---
title: "COVID-19 Evacuation Study"
subtitle: "Visualization Code"
authors: "Courtney Page-Tan and Timothy Fraser"
output: html_notebook
---

This document provides code for replicating

This study tests whether evacuation from severe disasters led to upticks in the spread of COVID. We estimate evacuation using data from Facebook Data for Good. Creation of the dataset is described in 'replication_code.Rmd." Creation of models is described in 'modeling_code.Rmd'.

This document directly follows 'modeling_code.Rmd' and provides code for generating visualizations and validation tests.



# 7. Simulating Effects

## Zelig Plot for Panel Random Effects

```{r}
library(Zelig)


# Grab cases post-October
la <- read_rds("raw_data/la_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("positivity_rate")), 
            funs(if_else(
    condition = . == 0,
    true = sort(unique(.))[2] / 2,
    false = .))) %>%
  # Rescale other predictors 
  mutate_at(vars(
    contains("evacuation"),
    positivity_rate_lag2,
    avg_rainfall, bonding, bridging, linking, 
    svi_socioeconomic, svi_household_disability, 
    svi_minority, svi_housing_tranport, workplaces_int,
    health_care_capacity_int, health_quality_int, 
    employee_muni_int, democrat_2016_int),
    funs(scale(.))) %>%
  mutate(week = factor(week))

library(plm)
z1 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
         positivity_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int +health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls")

z2 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
        positivity_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int + health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls")

la %>%
  filter(evacuation_less > 0) %>%
  summarize(low = quantile(evacuation_less, 0.2),
            medium = quantile(evacuation_less, 0.5),
            high = quantile(evacuation_less, 0.8))

out <- bind_rows(
  z1 %>%
    setx(evacuation_less = seq(from = 0, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Decreased Movement") %>%
    select(x = evacuation_less, contains("qi_"), type),
  z2 %>%
    setx(evacuation_intra_less = seq(from = 0, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Decreased Movement\nWithin Cities") %>%
    select(x = evacuation_intra_less, contains("qi_"), type),
  z2 %>%
    setx(evacuation_inter_less = seq(from = 0, to = 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Decreased Movement\nBetween Cities") %>%
    select(x = evacuation_inter_less, contains("qi_"), type)) %>%
  mutate(type = factor(type, levels = c(
    "Decreased Movement",
    "Decreased Movement\nWithin Cities",
    "Decreased Movement\nBetween Cities")))

out %>%
  ggplot(mapping = aes(x = x, y = exp(qi_ci_median),
                       ymin = exp(qi_ci_min), ymax = exp(qi_ci_max), fill = type, color = type)) +
  geom_line() +
  geom_ribbon(alpha = 0.5) +
  labs(x = "Users Sheltering-in-Place per 1000 residents (Decreased Movement)",
       y = "Expected COVID-19 Test Positivity Rate\n(with simulated 95% Confidence Interval)") +
  facet_wrap(~type) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm")) +
  guides(fill = FALSE, color = FALSE) +
  scale_color_manual(values = c("#785EF0", "#DC267F", "#FFB000")) +
    scale_fill_manual(values = c("#785EF0", "#DC267F", "#FFB000")) +
  ggsave("viz/la_zelig.png", dpi = 500, width = 7, height = 4)
```


```{r}

# Grab cases post-October
ca <- read_rds("raw_data/ca_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(case_rate, case_rate_lag2), 
            funs(if_else(
    condition = . == 0,
    true = sort(unique(.))[2] / 2,
    false = .))) %>%
  # Simpliy variable to reduce colinearity
  mutate(health_care_capacity_int = ntile(health_care_capacity_int, 4),
         health_quality_int = ntile(health_quality_int, 4),
         svi_socioeconomic = ntile(svi_socioeconomic, 4),
         svi_minority = ntile(svi_minority, 4)) %>%
  # Rescale other predictors 
  mutate_at(vars(
    contains("evacuation"),
    case_rate_lag2,
    burn_rate_int, bonding, bridging, linking, 
    svi_socioeconomic, svi_household_disability, 
      svi_minority, svi_housing_tranport, workplaces_int,
      health_care_capacity_int, health_quality_int, 
      employee_muni_int, democrat_2016_int),
      funs(scale(.))) %>%
  mutate(week = factor(week))

z1 <- ca %>%
  zelig(formula = log(case_rate) ~ 
         case_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       burn_rate +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int +health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls")

z2 <- ca %>%
  zelig(formula = log(case_rate) ~ 
        case_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       burn_rate +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int + health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls")

ca %>%
  filter(evacuation_less > 0) %>%
  summarize(low = quantile(evacuation_less, 0.2),
            medium = quantile(evacuation_less, 0.5),
            high = quantile(evacuation_less, 0.8))

out <- bind_rows(
  z1 %>%
    setx(evacuation_less = seq(from = 0, to = 4, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Decreased Movement") %>%
    select(x = evacuation_less, contains("qi_"), type),
  z2 %>%
    setx(evacuation_intra_less = seq(from = 0, to = 4, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Decreased Movement\nWithin Cities") %>%
    select(x = evacuation_intra_less, contains("qi_"), type),
  z2 %>%
    setx(evacuation_inter_less = seq(from = 0, to = 4, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Decreased Movement\nBetween Cities") %>%
    select(x = evacuation_inter_less, contains("qi_"), type)) %>%
  mutate(type = factor(type, levels = c(
    "Decreased Movement",
    "Decreased Movement\nWithin Cities",
    "Decreased Movement\nBetween Cities")))

out %>%
  ggplot(mapping = aes(x = x, y = exp(qi_ci_median),
                       ymin = exp(qi_ci_min), ymax = exp(qi_ci_max), fill = type, color = type)) +
  geom_line() +
  geom_ribbon(alpha = 0.5) +
  labs(x = "Users Sheltering-in-Place per 1000 residents (Decreased Movement)",
       y = "Expected COVID-19 Case Rate\n(with simulated 95% Confidence Interval)") +
  facet_wrap(~type) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm")) +
  guides(fill = FALSE, color = FALSE) +
  scale_color_manual(values = c("#785EF0", "#DC267F", "#FFB000")) +
    scale_fill_manual(values = c("#785EF0", "#DC267F", "#FFB000")) +
  ggsave("viz/ca_zelig.png", dpi = 500, width = 7, height = 4)

```



## First differences (Matched)

### Damage

```{r}

library(Zelig)
library(tidyverse)

# Grab cases post-October
la <- read_rds("raw_data/la_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("positivity_rate")), 
            funs(if_else(
              condition = . == 0,
              true = sort(unique(.))[2] / 2,
              false = .))) %>%
  # We will NOT rescale the covariates this time,
  # so that we can create interpretable first differences
  mutate(week = factor(week)) %>%
  left_join(by = c("csub"), 
            y = read_rds("raw_data/la_disaster_match.rds")) %>%
  filter(weights > 0)


l1 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
         positivity_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int +health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

l2 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
        positivity_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int + health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

# Get the mean traits in T1 
# (At beginning of disaster, using conditions lagged from two weeks prior)
la_before = from_zelig_model(l2)$model %>%
  filter(week == "2020-10-28") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))
# Get the mean traits in T2
# (Two weeks after disaster, using conditions lagged from two weeks ago)
la_after = from_zelig_model(l2)$model %>%
  filter(week == "2020-11-25") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))



# Write a quick function to extract first differences
get_sim = function(mysimulation){
  
  data.frame(x = mysimulation$sim.out$x$ev %>% unlist(),
             x1 = mysimulation$sim.out$x1$ev %>% unlist(),
             fd = mysimulation$sim.out$x1$fd %>% unlist()) %>%
    return()
}

# First, we'll simulate the change,
# holding everything constant at their original means
# Don't actually need to adjust the others,
# because the others are time-invariant
la_effect = l2 %>%
  sim(., setx(., week = "2020-10-28",
           avg_rainfall = 0,
           evacuation_intra_more = la_before$evacuation_intra_more,
           evacuation_inter_more = la_before$evacuation_inter_more,
           evacuation_intra_less = la_before$evacuation_intra_less,
           evacuation_inter_less = la_before$evacuation_inter_less,
           positivity_rate_lag2 = la_before$positivity_rate_lag2,
           workplaces_int = la_before$workplaces_int),
      setx1(., week = "2020-11-25",
            avg_rainfall = 1,
            evacuation_intra_more = la_before$evacuation_intra_more,
            evacuation_inter_more = la_before$evacuation_inter_more,
            evacuation_intra_less = la_before$evacuation_intra_less,
            evacuation_inter_less = la_before$evacuation_inter_less,
            positivity_rate_lag2 = la_before$positivity_rate_lag2)) %>%
  get_sim()

# Get california
ca <- read_rds("raw_data/ca_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("case_rate")), 
            funs(if_else(
              condition = . == 0,
              true = sort(unique(.))[2] / 2,
              false = .))) %>%
  # We will NOT rescale the covariates this time,
  # so that we can create interpretable first differences
  mutate(week = factor(week)) %>%
  left_join(by = c("csub"), 
            y = read_rds("raw_data/ca_disaster_match.rds")) %>%
  filter(weights > 0)


c1 <- ca %>%
  zelig(formula = log(case_rate) ~ 
         case_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       burn_rate_int +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      #svi_minority + 
        svi_housing_tranport +
      workplaces_int + health_care_capacity_int + 
        #health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

c2 <- ca %>%
  zelig(formula = log(case_rate) ~ 
        case_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       burn_rate_int +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      #svi_minority + 
        svi_housing_tranport +
      workplaces_int + health_care_capacity_int +
        #health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

# Get the mean traits in T1 
# (At beginning of disaster, using conditions lagged from two weeks prior)
ca_before = from_zelig_model(c2)$model %>%
  filter(week == "2020-09-28") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))

# Get the mean traits in T2
# (Two weeks after disaster, using conditions lagged from two weeks ago)
ca_after = from_zelig_model(c2)$model %>%
  filter(week == "2020-10-26") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))


# Write a quick function to extract first differences
get_sim = function(mysimulation){
  
  data.frame(x = mysimulation$sim.out$x$ev %>% unlist(),
             x1 = mysimulation$sim.out$x1$ev %>% unlist(),
             fd = mysimulation$sim.out$x1$fd %>% unlist()) %>%
    return()
}

# First, we'll simulate the change,
# holding everything constant at their original means
# Don't actually need to adjust the others,
# because the others are time-invariant
ca_effect = c2 %>%
  sim(., setx(., week = "2020-09-28",
           burn_rate_int = 0,
           evacuation_intra_more = ca_before$evacuation_intra_more,
           evacuation_inter_more = ca_before$evacuation_inter_more,
           evacuation_intra_less = ca_before$evacuation_intra_less,
           evacuation_inter_less = ca_before$evacuation_inter_less,
           case_rate_lag2 = ca_before$case_rate_lag2,
           workplaces_int = ca_before$workplaces_int),
      setx1(., week = "2020-10-26",
            burn_rate_int = 1,
            evacuation_intra_more = ca_before$evacuation_intra_more,
            evacuation_inter_more = ca_before$evacuation_inter_more,
            evacuation_intra_less = ca_before$evacuation_intra_less,
            evacuation_inter_less = ca_before$evacuation_inter_less,
            case_rate_lag2 = ca_before$case_rate_lag2)) %>%
  get_sim()


fd_dis <- bind_rows(
  ca_effect %>% 
    mutate(case = "Glass Fire\n(California)"),
  la_effect %>%
    mutate(case = "Hurricane Zeta\n(Louisiana)")) %>%
  mutate(type = "Disaster Damage")


```

### Shelter-in-Place (Within)

```{r}

library(Zelig)
library(tidyverse)

# Grab cases post-October
la <- read_rds("raw_data/la_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("positivity_rate")), 
            funs(if_else(
              condition = . == 0,
              true = sort(unique(.))[2] / 2,
              false = .))) %>%
  # We will NOT rescale the covariates this time,
  # so that we can create interpretable first differences
  mutate(week = factor(week)) %>%
  left_join(by = c("csub"), 
            y = read_rds("raw_data/la_shelter_match.rds")) %>%
  filter(weights > 0)


l1 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
         positivity_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int +health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

l2 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
        positivity_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int + health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

# Get the mean traits in T1 
# (At beginning of disaster, using conditions lagged from two weeks prior)
la_before = from_zelig_model(l2)$model %>%
  filter(week == "2020-10-28") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))
# Get the mean traits in T2
# (Two weeks after disaster, using conditions lagged from two weeks ago)
la_after = from_zelig_model(l2)$model %>%
  filter(week == "2020-11-25") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))



# Write a quick function to extract first differences
get_sim = function(mysimulation){
  
  data.frame(x = mysimulation$sim.out$x$ev %>% unlist(),
             x1 = mysimulation$sim.out$x1$ev %>% unlist(),
             fd = mysimulation$sim.out$x1$fd %>% unlist()) %>%
    return()
}

# First, we'll simulate the change,
# holding everything constant at their original means
# Don't actually need to adjust the others,
# because the others are time-invariant
la_effect = l2 %>%
  sim(., setx(., week = "2020-10-28",
           avg_rainfall = la_before$avg_rainfall,
           evacuation_intra_more = la_before$evacuation_intra_more,
           evacuation_inter_more = la_before$evacuation_inter_more,
           evacuation_intra_less = 0,
           evacuation_inter_less = la_before$evacuation_inter_less,
           positivity_rate_lag2 = la_before$positivity_rate_lag2,
           workplaces_int = la_before$workplaces_int),
      setx1(., week = "2020-11-25",
            avg_rainfall = la_before$avg_rainfall,
            evacuation_intra_more = la_before$evacuation_intra_more,
            evacuation_inter_more = la_before$evacuation_inter_more,
            evacuation_intra_less = 1,
            evacuation_inter_less = la_before$evacuation_inter_less,
            positivity_rate_lag2 = la_before$positivity_rate_lag2)) %>%
  get_sim() 

# Get california
ca <- read_rds("raw_data/ca_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("case_rate")), 
            funs(if_else(
              condition = . == 0,
              true = sort(unique(.))[2] / 2,
              false = .))) %>%
  # We will NOT rescale the covariates this time,
  # so that we can create interpretable first differences
  mutate(week = factor(week)) %>%
  left_join(by = c("csub"), 
            y = read_rds("raw_data/ca_shelter_match.rds")) %>%
  filter(weights > 0)


c1 <- ca %>%
  zelig(formula = log(case_rate) ~ 
         case_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       burn_rate_int +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      #svi_minority + 
        svi_housing_tranport +
      workplaces_int + health_care_capacity_int + 
        #health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

c2 <- ca %>%
  zelig(formula = log(case_rate) ~ 
        case_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       burn_rate_int +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      #svi_minority + 
        svi_housing_tranport +
      workplaces_int + health_care_capacity_int +
        #health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

# Get the mean traits in T1 
# (At beginning of disaster, using conditions lagged from two weeks prior)
ca_before = from_zelig_model(c2)$model %>%
  filter(week == "2020-09-28") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))

# Get the mean traits in T2
# (Two weeks after disaster, using conditions lagged from two weeks ago)
ca_after = from_zelig_model(c2)$model %>%
  filter(week == "2020-10-26") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))


# Write a quick function to extract first differences
get_sim = function(mysimulation){
  
  data.frame(x = mysimulation$sim.out$x$ev %>% unlist(),
             x1 = mysimulation$sim.out$x1$ev %>% unlist(),
             fd = mysimulation$sim.out$x1$fd %>% unlist()) %>%
    return()
}

# First, we'll simulate the change,
# holding everything constant at their original means
# Don't actually need to adjust the others,
# because the others are time-invariant
ca_effect = c2 %>%
  sim(., setx(., week = "2020-09-28",
           burn_rate_int = ca_before$burn_rate_int,
           evacuation_intra_more = ca_before$evacuation_intra_more,
           evacuation_inter_more = ca_before$evacuation_inter_more,
           evacuation_intra_less = 0,
           evacuation_inter_less = ca_before$evacuation_inter_less,
           case_rate_lag2 = ca_before$case_rate_lag2,
           workplaces_int = ca_before$workplaces_int),
      setx1(., week = "2020-10-26",
            burn_rate_int = ca_before$burn_rate_int,
            evacuation_intra_more = ca_before$evacuation_intra_more,
            evacuation_inter_more = ca_before$evacuation_inter_more,
            evacuation_intra_less = 1,
            evacuation_inter_less = ca_before$evacuation_inter_less,
            case_rate_lag2 = ca_before$case_rate_lag2)) %>%
  get_sim()

fd_shelter <- bind_rows(
  ca_effect %>% 
    mutate(case = "Glass Fire\n(California)"),
  la_effect %>%
    mutate(case = "Hurricane Zeta\n(Louisiana)")) %>%
  mutate(type = "Shelter-in-Place\n(Within Cities)")
```

### Shelter-in-Place (Between)

```{r}

library(Zelig)
library(tidyverse)

# Grab cases post-October
la <- read_rds("raw_data/la_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("positivity_rate")), 
            funs(if_else(
              condition = . == 0,
              true = sort(unique(.))[2] / 2,
              false = .))) %>%
  # We will NOT rescale the covariates this time,
  # so that we can create interpretable first differences
  mutate(week = factor(week)) %>%
  left_join(by = c("csub"), 
            y = read_rds("raw_data/la_shelter_match.rds")) %>%
  filter(weights > 0)


l1 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
         positivity_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int +health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

l2 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
        positivity_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int + health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

# Get the mean traits in T1 
# (At beginning of disaster, using conditions lagged from two weeks prior)
la_before = from_zelig_model(l2)$model %>%
  filter(week == "2020-10-28") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))
# Get the mean traits in T2
# (Two weeks after disaster, using conditions lagged from two weeks ago)
la_after = from_zelig_model(l2)$model %>%
  filter(week == "2020-11-25") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))



# Write a quick function to extract first differences
get_sim = function(mysimulation){
  
  data.frame(x = mysimulation$sim.out$x$ev %>% unlist(),
             x1 = mysimulation$sim.out$x1$ev %>% unlist(),
             fd = mysimulation$sim.out$x1$fd %>% unlist()) %>%
    return()
}

# First, we'll simulate the change,
# holding everything constant at their original means
# Don't actually need to adjust the others,
# because the others are time-invariant
la_effect = l2 %>%
  sim(., setx(., week = "2020-10-28",
           avg_rainfall = la_before$avg_rainfall,
           evacuation_intra_more = la_before$evacuation_intra_more,
           evacuation_inter_more = la_before$evacuation_inter_more,
           evacuation_intra_less = la_before$evacuation_intra_less,
           evacuation_inter_less = 1,
           positivity_rate_lag2 = la_before$positivity_rate_lag2,
           workplaces_int = la_before$workplaces_int),
      setx1(., week = "2020-11-25",
            avg_rainfall = la_before$avg_rainfall,
            evacuation_intra_more = la_before$evacuation_intra_more,
            evacuation_inter_more = la_before$evacuation_inter_more,
            evacuation_intra_less = la_before$evacuation_intra_less,
            evacuation_inter_less = 1,
            positivity_rate_lag2 = la_before$positivity_rate_lag2)) %>%
  get_sim()

# Get california
ca <- read_rds("raw_data/ca_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("case_rate")), 
            funs(if_else(
              condition = . == 0,
              true = sort(unique(.))[2] / 2,
              false = .))) %>%
  # We will NOT rescale the covariates this time,
  # so that we can create interpretable first differences
  mutate(week = factor(week)) %>%
  left_join(by = c("csub"), 
            y = read_rds("raw_data/ca_shelter_match.rds")) %>%
  filter(weights > 0)


c1 <- ca %>%
  zelig(formula = log(case_rate) ~ 
         case_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       burn_rate_int +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      #svi_minority + 
        svi_housing_tranport +
      workplaces_int + health_care_capacity_int + 
        #health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

c2 <- ca %>%
  zelig(formula = log(case_rate) ~ 
        case_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       burn_rate_int +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      #svi_minority + 
        svi_housing_tranport +
      workplaces_int + health_care_capacity_int +
        #health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

# Get the mean traits in T1 
# (At beginning of disaster, using conditions lagged from two weeks prior)
ca_before = from_zelig_model(c2)$model %>%
  filter(week == "2020-09-28") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))

# Get the mean traits in T2
# (Two weeks after disaster, using conditions lagged from two weeks ago)
ca_after = from_zelig_model(c2)$model %>%
  filter(week == "2020-10-26") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))


# Write a quick function to extract first differences
get_sim = function(mysimulation){
  
  data.frame(x = mysimulation$sim.out$x$ev %>% unlist(),
             x1 = mysimulation$sim.out$x1$ev %>% unlist(),
             fd = mysimulation$sim.out$x1$fd %>% unlist()) %>%
    return()
}

# First, we'll simulate the change,
# holding everything constant at their original means
# Don't actually need to adjust the others,
# because the others are time-invariant
ca_effect = c2 %>%
  sim(., setx(., week = "2020-09-28",
           burn_rate_int = ca_before$burn_rate_int,
           evacuation_intra_more = ca_before$evacuation_intra_more,
           evacuation_inter_more = ca_before$evacuation_inter_more,
           evacuation_intra_less = ca_before$evacuation_intra_less,
           evacuation_inter_less = 0,
           case_rate_lag2 = ca_before$case_rate_lag2,
           workplaces_int = ca_before$workplaces_int),
      setx1(., week = "2020-10-26",
            burn_rate_int = ca_before$burn_rate_int,
            evacuation_intra_more = ca_before$evacuation_intra_more,
            evacuation_inter_more = ca_before$evacuation_inter_more,
            evacuation_intra_less = ca_before$evacuation_intra_less,
            evacuation_inter_less = 1,
            case_rate_lag2 = ca_before$case_rate_lag2)) %>%
  get_sim()


fd_shelter_b <- bind_rows(
  ca_effect %>% 
    mutate(case = "Glass Fire\n(California)"),
  la_effect %>%
    mutate(case = "Hurricane Zeta\n(Louisiana)")) %>%
  mutate(type = "Shelter-in-Place\n(Between Cities)")
```

### Evacuation (Within)

```{r}

library(Zelig)
library(tidyverse)

# Grab cases post-October
la <- read_rds("raw_data/la_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("positivity_rate")), 
            funs(if_else(
              condition = . == 0,
              true = sort(unique(.))[2] / 2,
              false = .))) %>%
  # We will NOT rescale the covariates this time,
  # so that we can create interpretable first differences
  mutate(week = factor(week)) %>%
  left_join(by = c("csub"), 
            y = read_rds("raw_data/la_evac_match.rds")) %>%
  filter(weights > 0)


l1 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
         positivity_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int +health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

l2 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
        positivity_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int + health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

# Get the mean traits in T1 
# (At beginning of disaster, using conditions lagged from two weeks prior)
la_before = from_zelig_model(l2)$model %>%
  filter(week == "2020-10-28") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))
# Get the mean traits in T2
# (Two weeks after disaster, using conditions lagged from two weeks ago)
la_after = from_zelig_model(l2)$model %>%
  filter(week == "2020-11-25") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))



# Write a quick function to extract first differences
get_sim = function(mysimulation){
  
  data.frame(x = mysimulation$sim.out$x$ev %>% unlist(),
             x1 = mysimulation$sim.out$x1$ev %>% unlist(),
             fd = mysimulation$sim.out$x1$fd %>% unlist()) %>%
    return()
}

# First, we'll simulate the change,
# holding everything constant at their original means
# Don't actually need to adjust the others,
# because the others are time-invariant
la_effect = l2 %>%
  sim(., setx(., week = "2020-10-28",
           avg_rainfall = la_before$avg_rainfall,
           evacuation_intra_more = 0,
           evacuation_inter_more = la_before$evacuation_inter_more,
           evacuation_intra_less = la_before$evacuation_intra_less,
           evacuation_inter_less = la_before$evacuation_inter_less,
           positivity_rate_lag2 = la_before$positivity_rate_lag2,
           workplaces_int = la_before$workplaces_int),
      setx1(., week = "2020-11-25",
            avg_rainfall = la_before$avg_rainfall,
            evacuation_intra_more = 1,
            evacuation_inter_more = la_before$evacuation_inter_more,
            evacuation_intra_less = la_before$evacuation_intra_less,
            evacuation_inter_less = la_before$evacuation_inter_less,
            positivity_rate_lag2 = la_before$positivity_rate_lag2)) %>%
  get_sim()

# Get california
ca <- read_rds("raw_data/ca_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("case_rate")), 
            funs(if_else(
              condition = . == 0,
              true = sort(unique(.))[2] / 2,
              false = .))) %>%
  # We will NOT rescale the covariates this time,
  # so that we can create interpretable first differences
  mutate(week = factor(week)) %>%
  left_join(by = c("csub"), 
            y = read_rds("raw_data/ca_evac_match.rds")) %>%
  filter(weights > 0)


c1 <- ca %>%
  zelig(formula = log(case_rate) ~ 
         case_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       burn_rate_int +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      #svi_minority + 
        svi_housing_tranport +
      workplaces_int + health_care_capacity_int + 
        #health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

c2 <- ca %>%
  zelig(formula = log(case_rate) ~ 
        case_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       burn_rate_int +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      #svi_minority + 
        svi_housing_tranport +
      workplaces_int + health_care_capacity_int +
        #health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

# Get the mean traits in T1 
# (At beginning of disaster, using conditions lagged from two weeks prior)
ca_before = from_zelig_model(c2)$model %>%
  filter(week == "2020-09-28") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))

# Get the mean traits in T2
# (Two weeks after disaster, using conditions lagged from two weeks ago)
ca_after = from_zelig_model(c2)$model %>%
  filter(week == "2020-10-26") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))


# Write a quick function to extract first differences
get_sim = function(mysimulation){
  
  data.frame(x = mysimulation$sim.out$x$ev %>% unlist(),
             x1 = mysimulation$sim.out$x1$ev %>% unlist(),
             fd = mysimulation$sim.out$x1$fd %>% unlist()) %>%
    return()
}

# First, we'll simulate the change,
# holding everything constant at their original means
# Don't actually need to adjust the others,
# because the others are time-invariant
ca_effect = c2 %>%
  sim(., setx(., week = "2020-09-28",
           burn_rate_int = ca_before$burn_rate_int,
           evacuation_intra_more = 0,
           evacuation_inter_more = ca_before$evacuation_inter_more,
           evacuation_intra_less = ca_before$evacuation_intra_less,
           evacuation_inter_less = ca_before$evacuation_inter_less,
           case_rate_lag2 = ca_before$case_rate_lag2,
           workplaces_int = ca_before$workplaces_int),
      setx1(., week = "2020-10-26",
            burn_rate_int = ca_before$burn_rate_int,
            evacuation_intra_more = 1,
            evacuation_inter_more = ca_before$evacuation_inter_more,
            evacuation_intra_less = ca_before$evacuation_intra_less,
            evacuation_inter_less = ca_before$evacuation_inter_less,
            case_rate_lag2 = ca_before$case_rate_lag2)) %>%
  get_sim()


fd_evac <- bind_rows(
  ca_effect %>% 
    mutate(case = "Glass Fire\n(California)"),
  la_effect %>%
    mutate(case = "Hurricane Zeta\n(Louisiana)")) %>%
  mutate(type = "Evacuation\n(Within Cities)")
```


### Evacuation (Between)

```{r}

library(Zelig)
library(tidyverse)

# Grab cases post-October
la <- read_rds("raw_data/la_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("positivity_rate")), 
            funs(if_else(
              condition = . == 0,
              true = sort(unique(.))[2] / 2,
              false = .))) %>%
  # We will NOT rescale the covariates this time,
  # so that we can create interpretable first differences
  mutate(week = factor(week)) %>%
  left_join(by = c("csub"), 
            y = read_rds("raw_data/la_evac_match.rds")) %>%
  filter(weights > 0)


l1 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
         positivity_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int +health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

l2 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
        positivity_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int + health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

# Get the mean traits in T1 
# (At beginning of disaster, using conditions lagged from two weeks prior)
la_before = from_zelig_model(l2)$model %>%
  filter(week == "2020-10-28") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))
# Get the mean traits in T2
# (Two weeks after disaster, using conditions lagged from two weeks ago)
la_after = from_zelig_model(l2)$model %>%
  filter(week == "2020-11-25") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))



# Write a quick function to extract first differences
get_sim = function(mysimulation){
  
  data.frame(x = mysimulation$sim.out$x$ev %>% unlist(),
             x1 = mysimulation$sim.out$x1$ev %>% unlist(),
             fd = mysimulation$sim.out$x1$fd %>% unlist()) %>%
    return()
}

# First, we'll simulate the change,
# holding everything constant at their original means
# Don't actually need to adjust the others,
# because the others are time-invariant
la_effect = l2 %>%
  sim(., setx(., week = "2020-10-28",
           avg_rainfall = la_before$avg_rainfall,
           evacuation_intra_more = la_before$evacuation_intra_more,
           evacuation_inter_more = 0,
           evacuation_intra_less = la_before$evacuation_intra_less,
           evacuation_inter_less = la_before$evacuation_inter_less,
           positivity_rate_lag2 = la_before$positivity_rate_lag2,
           workplaces_int = la_before$workplaces_int),
      setx1(., week = "2020-11-25",
            avg_rainfall = la_before$avg_rainfall,
            evacuation_intra_more = la_before$evacuation_intra_more,
            evacuation_inter_more = 1,
            evacuation_intra_less = la_before$evacuation_intra_less,
            evacuation_inter_less = la_before$evacuation_inter_less,
            positivity_rate_lag2 = la_before$positivity_rate_lag2)) %>%
  get_sim()

# Get california
ca <- read_rds("raw_data/ca_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("case_rate")), 
            funs(if_else(
              condition = . == 0,
              true = sort(unique(.))[2] / 2,
              false = .))) %>%
  # We will NOT rescale the covariates this time,
  # so that we can create interpretable first differences
  mutate(week = factor(week)) %>%
  left_join(by = c("csub"), 
            y = read_rds("raw_data/ca_evac_match.rds")) %>%
  filter(weights > 0)


c1 <- ca %>%
  zelig(formula = log(case_rate) ~ 
         case_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       burn_rate_int +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      #svi_minority + 
        svi_housing_tranport +
      workplaces_int + health_care_capacity_int + 
        #health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

c2 <- ca %>%
  zelig(formula = log(case_rate) ~ 
        case_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       burn_rate_int +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      #svi_minority + 
        svi_housing_tranport +
      workplaces_int + health_care_capacity_int +
        #health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")

# Get the mean traits in T1 
# (At beginning of disaster, using conditions lagged from two weeks prior)
ca_before = from_zelig_model(c2)$model %>%
  filter(week == "2020-09-28") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))

# Get the mean traits in T2
# (Two weeks after disaster, using conditions lagged from two weeks ago)
ca_after = from_zelig_model(c2)$model %>%
  filter(week == "2020-10-26") %>%
  summarize_at(vars(-week), funs(mean(., na.rm = TRUE)))


# Write a quick function to extract first differences
get_sim = function(mysimulation){
  
  data.frame(x = mysimulation$sim.out$x$ev %>% unlist(),
             x1 = mysimulation$sim.out$x1$ev %>% unlist(),
             fd = mysimulation$sim.out$x1$fd %>% unlist()) %>%
    return()
}

# First, we'll simulate the change,
# holding everything constant at their original means
# Don't actually need to adjust the others,
# because the others are time-invariant
ca_effect = c2 %>%
  sim(., setx(., week = "2020-09-28",
           burn_rate_int = ca_before$burn_rate_int,
           evacuation_intra_more = ca_before$evacuation_intra_more,
           evacuation_inter_more = 0,
           evacuation_intra_less = ca_before$evacuation_intra_less,
           evacuation_inter_less = ca_before$evacuation_inter_less,
           case_rate_lag2 = ca_before$case_rate_lag2,
           workplaces_int = ca_before$workplaces_int),
      setx1(., week = "2020-10-26",
            burn_rate_int = ca_before$burn_rate_int,
            evacuation_intra_more = ca_before$evacuation_intra_more,
            evacuation_inter_more = 1,
            evacuation_intra_less = ca_before$evacuation_intra_less,
            evacuation_inter_less = ca_before$evacuation_inter_less,
            case_rate_lag2 = ca_before$case_rate_lag2)) %>%
  get_sim()


fd_evac_b <- bind_rows(
  ca_effect %>% 
    mutate(case = "Glass Fire\n(California)"),
  la_effect %>%
    mutate(case = "Hurricane Zeta\n(Louisiana)")) %>%
  mutate(type = "Evacuation\n(Between Cities)")
```


### Visualize

```{r}
# Bind first differences together
bind_rows(
  fd_dis,
  fd_evac,
  fd_evac_b,
  fd_shelter,
  fd_shelter_b) %>%
  saveRDS("raw_data/fd.rds")

remove(ca,ca_after, ca_before,
       la, la_after, la_before,
       la_effect, ca_effect,
       l1,l2,c1,c2,
       fd_dis,
       fd_evac,
       fd_evac_b,
       fd_shelter,
       fd_shelter_b)

mysim <- read_rds("raw_data/fd.rds")

viz <- bind_rows(
  mysim %>% 
    group_by(type, case) %>%
    summarize(qi_median = median(fd),
              lower = quantile(fd, probs = 0.05),
              upper = quantile(fd, probs = 0.95),
              ci = "90%"),
  mysim %>% 
    group_by(type, case) %>%
    summarize(qi_median = median(fd),
              lower = quantile(fd, probs = 0.025),
              upper = quantile(fd, probs = 0.975),
              ci = "95%"),
  mysim %>% 
    group_by(type, case) %>%
    summarize(qi_median = median(fd),
              lower = quantile(fd, probs = 0.01),
              upper = quantile(fd, probs = 0.99),
              ci = "99%"),
  mysim %>% 
    group_by(type, case) %>%
    summarize(qi_median = median(fd),
              lower = quantile(fd, probs = 0.005),
              upper = quantile(fd, probs = 0.995),
              ci = "99.9%")) %>%
  mutate(ci = factor(ci, levels = c("99.9%", "99%", "95%", "90%") %>% rev())) %>%
  mutate(case = case %>% recode_factor(
    "Glass Fire\n(California)" = "Cases per 100,000 residents\nafter Glass Fire\n(California)",
    "Hurricane Zeta\n(Louisiana)" = "% Positive Tests\nafter Hurricane Zeta\n(Louisiana)")) %>%
  mutate(type = type %>% recode_factor(
    "Disaster Damage" = "Disaster<br>Damage <sup>3</sup>",
    "Evacuation\n(Within Cities)" = "Evacuation <sup>2</sup><br>(Within Cities)",
    "Evacuation\n(Between Cities)" = "Evacuation<br>(Between Cities)",
    "Shelter-in-Place\n(Within Cities)" = "Shelter-in-Place<br>(Within Cities)",
    "Shelter-in-Place\n(Between Cities)" = "Shelter-in-Place<br>(Between Cities)")) %>%
  mutate(type = factor(type, levels = levels(type)))

library(ggtext)

viz %>%
  ggplot(mapping = aes(x = type, y = qi_median, 
                       ymin = lower, ymax = upper, color = ci)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed") +
  geom_linerange(size = 3, alpha = 0.5) +
  geom_point(size = 4, color = "black") +
  scale_color_grey(start = 0, end = 0.75) +
  facet_grid(~case, scales = "free") +
  theme_classic(base_size = 14) +
  coord_flip() +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        legend.title = element_markdown(size = 12),
        axis.text.y = element_markdown(),
        plot.caption = element_markdown(size = 11, hjust = 0)) +
  labs(
    x = NULL, y = "COVID-19 Spread Rate",
    color = "Simulated<br>Confidence<br>Interval <sup>1</sup>",
       caption = "<sup>1</sup> Based on 1000 simulations in the Zelig package in R,<br>holding all other variables at their mean at the start of the disaster.<br>Treatment variable increased from 0 at start of disaster to 1 four weeks later.<<br><br><sup>2</sup> Evacuation variables measured as a change in movement per 1000 residents<br>by 1 person compared to baseline conditions.<br><br><sup>3</sup> Disaster damaged measured as an additional percentage point of land area burned,<br>or an additional millimeter of rainfall per week.") +
  ggsave("viz/fd.png", dpi = 500, width = 8, height = 6)

```


## Zelig

```{r}
library(Zelig)
library(tidyverse)


# Grab cases post-October
la <- read_rds("raw_data/la_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("positivity_rate")), 
            funs(if_else(
    condition = . == 0,
    true = sort(unique(.))[2] / 2,
    false = .))) %>%
  # Rescale other predictors 
  mutate(week = factor(week)) %>%
  left_join(by = c("csub"), 
            y = read_rds("raw_data/la_shelter_match.rds")) %>%
  filter(weights > 0)

# Get models
l1 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
         positivity_rate_lag2 +
        evacuation_more + 
         evacuation_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int +health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weightw = "weights")

l2 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
        positivity_rate_lag2 +
        evacuation_intra_more + evacuation_inter_more +
        evacuation_intra_less + evacuation_inter_less +
       avg_rainfall +
      bonding + bridging + linking + 
      svi_socioeconomic + svi_household_disability + 
      svi_minority + svi_housing_tranport +
      workplaces_int + health_care_capacity_int + health_quality_int + 
      employee_muni_int + democrat_2016_int + week,
      model = "ls", weights = "weights")
# Check realistic values
la %>%
  filter(evacuation_less > 0) %>%
  summarize(low = quantile(evacuation_less, 0.2),
            medium = quantile(evacuation_less, 0.5),
            high = quantile(evacuation_less, 0.8))

out1 <- bind_rows(
  l2 %>%
    setx(evacuation_intra_less = seq(from = 0, to = 10, length.out = 30)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Decreased Movement\nWithin Cities") %>%
    dplyr::select(x = evacuation_intra_less, ev = expected_value, type),
  l2 %>%
    setx(evacuation_inter_less = seq(from = 0, to = 10, length.out = 30)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    mutate(type = "Decreased Movement\nBetween Cities") %>%
    dplyr::select(x = evacuation_inter_less, ev = expected_value, type)) %>%
  mutate(type = factor(type, levels = c(
    "Decreased Movement\nWithin Cities",
    "Decreased Movement\nBetween Cities")))
```


```{r, message = FALSE, warning = FALSE}
# Grab cases post-October
ca <- read_rds("raw_data/ca_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(case_rate, case_rate_lag2), 
            funs(if_else(
    condition = . == 0,
    true = sort(unique(.))[2] / 2,
    false = .))) %>%
  # Simpliy variable to reduce colinearity
  mutate(health_care_capacity_int = ntile(health_care_capacity_int, 4),
         health_quality_int = ntile(health_quality_int, 4),
         svi_socioeconomic = ntile(svi_socioeconomic, 4),
         svi_minority = ntile(svi_minority, 4)) %>%
   left_join(by = c("csub"), 
             y = read_rds("raw_data/ca_shelter_match.rds")) %>%
  filter(weights > 0) %>%
  mutate(week = factor(week)) 

#remotes::install_github('IQSS/ZeligMultilevel')

library(Zelig)
library(ZeligMultilevel)

get_mixed = function(value){
  print(round(value, 2))
  
  within <- zlsmixed$new()
  within <- ca %>%
    zelig(formula = log(case_rate) ~ 
            case_rate_lag2 +
            evacuation_intra_more + 
            evacuation_inter_more +
            evacuation_intra_less +
            evacuation_inter_less +
            burn_rate_int +
            bonding + bridging + linking + 
            svi_socioeconomic + svi_household_disability + 
            #svi_minority + 
            svi_housing_tranport +
            workplaces_int + health_care_capacity_int + #health_quality_int + 
            employee_muni_int + democrat_2016_int + 
            (1 | week), cite = FALSE,
          model = "ls.mixed", weights = "weights")
  
  within$setx(evacuation_intra_less = value)
  within$sim(num = 1000)
  
  
  between <- zlsmixed$new()
  between <- ca %>%
    zelig(formula = log(case_rate) ~ 
            case_rate_lag2 +
            evacuation_intra_more + 
            evacuation_inter_more +
            evacuation_intra_less +
            evacuation_inter_less +
            burn_rate_int +
            bonding + bridging + linking + 
            svi_socioeconomic + svi_household_disability + 
            #svi_minority + 
            svi_housing_tranport +
            workplaces_int + health_care_capacity_int + #health_quality_int + 
            employee_muni_int + democrat_2016_int + 
            (1 | week), cite = FALSE,
          model = "ls.mixed", weights = "weights")
  
  between$setx(evacuation_inter_less = value)
  between$sim(num = 1000)
  
  bind_rows(
    data.frame(ev = within$sim.out[[1]]$ev %>% unlist(),
               type = "Decreased Movement\nWithin Cities"),
    data.frame(ev = between$sim.out[[1]]$ev %>% unlist(),
               type = "Decreased Movement\nBetween Cities")) %>%
    return()
}

# Check realistic values
ca %>%
  filter(evacuation_less > 0) %>%
  summarize(low = quantile(evacuation_less, 0.2),
            medium = quantile(evacuation_less, 0.5),
            high = quantile(evacuation_less, 0.8))

# Run a loop to get expected values
out2 <- data.frame(value = seq(from = 0, to = 10, length.out = 30)) %>%
  split(.$value) %>%
  map_dfr(~get_mixed(.$value), .id = "value")

```


```{r}
bind_rows(out1 %>% 
            mutate(model = "Hurricane Zeta"),
          out2 %>% 
            rename(x = value) %>% mutate(x = as.numeric(x)) %>%
            mutate(model = "Glass Fire")) %>%
  saveRDS("raw_data/ev.rds")
remove(out1, out2)
rm(list = ls())
```

```{r}
library(tidyverse)

mysim <- read_rds("raw_data/ev.rds") %>%
  rename(case = model)

out <- bind_rows(
  mysim %>% 
    group_by(type, case, x) %>%
    summarize(qi_median = median(ev),
              lower = quantile(ev, probs = 0.05),
              upper = quantile(ev, probs = 0.95),
              ci = "90%"),
  mysim %>% 
    group_by(type, case, x) %>%
    summarize(qi_median = median(ev),
              lower = quantile(ev, probs = 0.025),
              upper = quantile(ev, probs = 0.975),
              ci = "95%"),
  mysim %>% 
    group_by(type, case, x) %>%
    summarize(qi_median = median(ev),
              lower = quantile(ev, probs = 0.01),
              upper = quantile(ev, probs = 0.99),
              ci = "99%"),
  mysim %>% 
    group_by(type, case, x) %>%
    summarize(qi_median = median(ev),
              lower = quantile(ev, probs = 0.005),
              upper = quantile(ev, probs = 0.995),
              ci = "99.9%")) %>%
  dplyr::select(type, case, x, qi_median, lower, upper) %>%
  pivot_longer(cols = c(lower, upper), names_to = "whatever", values_to ="ev") %>%
  dplyr::select(-whatever)  %>%
  arrange(ev) %>%
  group_by(type, case, x) %>%
  arrange(ev) %>%
  mutate(order = paste("rank", 1:n(), sep = "")) %>%
    ungroup() %>%
  pivot_wider(id_cols = c(type, case, x, qi_median), 
              names_from = order, values_from = ev)  

remove(mysim)

viz <- bind_rows(
  out %>% 
    dplyr::select(type,case,x,qi_median,
                  lower = rank1,
                  upper = rank2) %>%
    mutate(ci = "99.9%",
                  set = "lower"),
  out %>% 
    dplyr::select(type,case,x,qi_median,
                  lower = rank2,
                  upper = rank3) %>%
                  mutate(ci = "99%",
                  set = "lower"),
  out %>% 
    dplyr::select(type,case,x,qi_median,
                  lower = rank3,
                  upper = rank4) %>%
    mutate(ci = "95%",
                  set = "lower"),
  out %>% 
    dplyr::select(type,case,x,qi_median,
                  lower = rank4,
                  upper = rank5) %>%
                  mutate(ci = "90%",
                  set = "lower"),
  out %>% 
    dplyr::select(type,case,x,qi_median,
                  lower = rank5,
                  upper = rank6) %>%
                  mutate(ci = "95%",
                  set = "upper"),
  out %>% 
    dplyr::select(type,case,x,qi_median,
                  lower = rank6,
                  upper = rank7) %>%
                  mutate(ci = "99%",
                  set = "upper"),
  out %>% 
    dplyr::select(type,case,x,qi_median,
                  lower = rank7,
                  upper = rank8) %>% 
                  mutate(ci = "99.9%",
                  set = "upper"))


viz %>%
  mutate(case = case %>% recode_factor(
    "Glass Fire" = "COVID-19 Case Rate\nper 1,000 residents\nafter Glass Fire\n(California)",
    "Hurricane Zeta" = "% Tests Positive\nafter Hurricane Zeta\n(Louisiana)")) %>%
  mutate(type = factor(type, levels = c("Decreased Movement\nWithin Cities",
                                        "Decreased Movement\nBetween Cities"))) %>%
  mutate(ci = factor(ci, levels = c("90%", "95%", "99%", "99.9%"))) %>%
  mutate(group = paste(set, ci, type, case)) %>%
  ggplot(mapping = aes(x = x, y = exp(qi_median), 
                       ymin = exp(lower), ymax = exp(upper), fill = ci, group = group)) +
  geom_ribbon(color = "white") +
  geom_line(color = "white", linetype = "dashed") +
  viridis::scale_fill_viridis(option = "plasma", discrete = TRUE,
                              begin = 0.2, end = 0.8) +
  scale_y_log10() +
  facet_wrap(type~case, scales = "free", ncol = 4) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.25, "cm")) +
  labs(x = "# of Facebook Users who Sheltered in Place per 1,000 residents",
       y = "Expected COVID-19 Spread Rate\n(y-axis on a log-scale)", fill = "Simulated\nConfidence\nInterval") +
  ggsave("viz/expected_effects.png", dpi = 500, width = 10, height = 6)
```



## DiD Visualization

### Data and Models

```{r}

# Grab cases post-October
la <- read_rds("raw_data/la_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("positivity_rate")), 
            funs(if_else(
              condition = . == 0,
              true = sort(unique(.))[2] / 2,
              false = .))) %>%
  # Rescale other predictors 
  mutate_at(vars(
    #contains("evacuation"),
    #avg_rainfall, 
    positivity_rate_lag2,
    bonding, bridging, linking, 
    svi_socioeconomic, svi_household_disability, 
    svi_minority, svi_housing_tranport, workplaces_int,
    health_care_capacity_int, health_quality_int, 
    employee_muni_int, democrat_2016_int),
    funs(scale(.))) %>%
  # Join in Coarsened Exact Matching weights, and zoom into key sample
  left_join(by = c("csub"), y = read_rds("raw_data/la_shelter_match.rds")) %>%
  filter(weights > 0) %>%
  as.data.frame() %>%
  # Let's clarify the counter variable
  group_by(csub) %>%
  arrange(week) %>%
  mutate(counter = 1:n()) %>%
  ungroup() %>%
  mutate(week = factor(week))


l3 <- la %>%
  zelig(formula = log(positivity_rate) ~ 
          positivity_rate_lag2 + counter +
          evacuation_intra_more * counter +
          evacuation_inter_more * counter +
          evacuation_intra_less * counter + 
          evacuation_inter_less * counter +
          avg_rainfall * counter +
          bonding + bridging + linking + 
          svi_socioeconomic + svi_household_disability + 
          svi_minority + svi_housing_tranport +
          workplaces_int + health_care_capacity_int + health_quality_int + 
          employee_muni_int + democrat_2016_int - counter + week, 
        model = "ls", weights = "weights")

# Grab cases post-October
ca <- read_rds("raw_data/ca_dataset.rds") %>%
  # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(case_rate, case_rate_lag2), 
            funs(if_else(
              condition = . == 0,
              true = sort(unique(.))[2] / 2,
              false = .))) %>%
  # Simpliy variable to reduce colinearity
  mutate(health_care_capacity_int = ntile(health_care_capacity_int, 4),
         health_quality_int = ntile(health_quality_int, 4),
         svi_socioeconomic = ntile(svi_socioeconomic, 4),
         svi_minority = ntile(svi_minority, 4)) %>%
  # Rescale other predictors 
  mutate_at(vars(
    #contains("evacuation"),
    case_rate_lag2,
    #burn_rate_int, 
    bonding, bridging, linking, 
    svi_socioeconomic, svi_household_disability, 
    svi_minority, svi_housing_tranport, workplaces_int,
    health_care_capacity_int, health_quality_int, 
    employee_muni_int, democrat_2016_int),
    funs(scale(.))) %>%
  # Join in Coarsened Exact Matching weights, and zoom into key sample
  left_join(by = c("csub"), y = read_rds("raw_data/ca_shelter_match.rds")) %>%
  filter(weights > 0) %>%
  as.data.frame() %>%
  # Let's clarify the counter variable
  group_by(csub) %>%
  arrange(week) %>%
  mutate(counter = 1:n()) %>%
  ungroup() %>%
  mutate(week = factor(week))



c6 <- ca %>%
  zelig(formula = log(case_rate) ~ 
          case_rate_lag2 + counter +
          evacuation_intra_more * counter + 
          evacuation_inter_more * counter +
          evacuation_intra_less * counter + 
          evacuation_inter_less * counter +
          burn_rate_int * counter +
          bonding + bridging + linking + 
          svi_socioeconomic + svi_household_disability + 
          #svi_minority + 
          svi_housing_tranport +
          workplaces_int + health_care_capacity_int + #health_quality_int + 
          employee_muni_int + democrat_2016_int - counter + week, 
        model = "ls", weights = "weights")
```

### Simulation

```{r}
#%>% unique() %>% sort()
# I want to simulate the expected effect of decreasing evacuation
# assuming everything else stayed the same...
lout <- bind_rows(
  # Treatment Group
  l3 %>%
    setx(
      # For Louisiana, simulate having no evacuation until the week of the disaster
      # remember, we're looking at week 13 as two weeks after the disaster, 
      # when we would first see the effect of evacuation two weeks prior
      evacuation_intra_less = c(rep(0, 12), rep(10, 1), rep(0, 6)),
      evacuation_intra_more = 0,
      evacuation_inter_more = 0,
      evacuation_inter_less = 0,
      counter = 1:19,
      week = levels(la$week)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Treated (1 week)"),
  
  # Treatment Group
  l3 %>%
    setx(
      # For Louisiana, simulate having no evacuation until the week of the disaster
      # remember, we're looking at week 13 as two weeks after the disaster, 
      # when we would first see the effect of evacuation two weeks prior
      evacuation_intra_less = c(rep(0, 12), rep(10, 2), rep(0, 5)),
      evacuation_intra_more = 0,
      evacuation_inter_more = 0,
      evacuation_inter_less = 0,
      counter = 1:19,
      week = levels(la$week)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Treated (2 weeks)"),
  
  
  # Treatment Group
  l3 %>%
    setx(
      # For Louisiana, simulate having no evacuation until the week of the disaster
      # remember, we're looking at week 13 as two weeks after the disaster, 
      # when we would first see the effect of evacuation two weeks prior
      evacuation_intra_less = c(rep(0, 12), rep(10, 3), rep(0, 4)),
      evacuation_intra_more = 0,
      evacuation_inter_more = 0,
      evacuation_inter_less = 0,
      counter = 1:19,
      week = levels(la$week)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Treated (3 weeks)"),
  
   # Treatment Group
  l3 %>%
    setx(
      # For Louisiana, simulate having no evacuation until the week of the disaster
      # remember, we're looking at week 13 as two weeks after the disaster, 
      # when we would first see the effect of evacuation two weeks prior
      evacuation_intra_less = c(rep(0, 12), rep(10, 4), rep(0, 3)),
      evacuation_intra_more = 0,
      evacuation_inter_more = 0,
      evacuation_inter_less = 0,
      counter = 1:19,
      week = levels(la$week)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Treated (4 weeks)"),
   # Treatment Group
  l3 %>%
    setx(
      # For Louisiana, simulate having no evacuation until the week of the disaster
      # remember, we're looking at week 13 as two weeks after the disaster, 
      # when we would first see the effect of evacuation two weeks prior
      evacuation_intra_less = c(rep(0, 12), rep(10, 5), rep(0, 2)),
      evacuation_intra_more = 0,
      evacuation_inter_more = 0,
      evacuation_inter_less = 0,
      counter = 1:19,
      week = levels(la$week)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Treated (5 weeks)"),
  
  # Control Group
  l3 %>%
    setx(
      # For Louisiana, simulate having no evacuation until the week of the disaster
      # remember, we're looking at week 13 as two weeks after the disaster, 
      # when we would first see the effect of evacuation two weeks prior
      evacuation_intra_less = 0,
      evacuation_intra_more = 0,
      evacuation_inter_more = 0,
      evacuation_inter_less = 0,
      counter = 1:19,
      week = levels(la$week)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Control"))




# I want to simulate the expected effect of decreasing evacuation
# assuming everything else stayed the same...
cout <- bind_rows(
  # Treatment Group
  c6 %>%
    setx(
      # For Louisiana, simulate having no evacuation until the week of the disaster
      # remember, we're looking at week 9 as two weeks after the disaster, 
      # when we would first see the effect of evacuation two weeks prior
      evacuation_intra_less = c(rep(0, 9), rep(10, 1), rep(0, 9)),
      evacuation_intra_more = 0,
      evacuation_inter_more = 0,
      evacuation_inter_less = 0,
      counter = 1:19,
      week = levels(ca$week)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Treated (1 week)"),
  
  # Treatment Group
  c6 %>%
    setx(
      # For Louisiana, simulate having no evacuation until the week of the disaster
      # remember, we're looking at week 13 as two weeks after the disaster, 
      # when we would first see the effect of evacuation two weeks prior
      evacuation_intra_less = c(rep(0, 9), rep(10, 2), rep(0, 8)),
      evacuation_intra_more = 0,
      evacuation_inter_more = 0,
      evacuation_inter_less = 0,
      counter = 1:19,
      week = levels(ca$week)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Treated (2 weeks)"),
  
  
  # Treatment Group
  c6 %>%
    setx(
      # For Louisiana, simulate having no evacuation until the week of the disaster
      # remember, we're looking at week 13 as two weeks after the disaster, 
      # when we would first see the effect of evacuation two weeks prior
      evacuation_intra_less = c(rep(0, 9), rep(10, 3), rep(0, 7)),
      evacuation_intra_more = 0,
      evacuation_inter_more = 0,
      evacuation_inter_less = 0,
      counter = 1:19,
      week = levels(ca$week)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Treated (3 weeks)"),
  
   
  # Treatment Group
  c6 %>%
    setx(
      # For Louisiana, simulate having no evacuation until the week of the disaster
      # remember, we're looking at week 13 as two weeks after the disaster, 
      # when we would first see the effect of evacuation two weeks prior
      evacuation_intra_less = c(rep(0, 9), rep(10, 4), rep(0, 6)),
      evacuation_intra_more = 0,
      evacuation_inter_more = 0,
      evacuation_inter_less = 0,
      counter = 1:19,
      week = levels(ca$week)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Treated (4 weeks)"),
  
    
  # Treatment Group
  c6 %>%
    setx(
      # For Louisiana, simulate having no evacuation until the week of the disaster
      # remember, we're looking at week 13 as two weeks after the disaster, 
      # when we would first see the effect of evacuation two weeks prior
      evacuation_intra_less = c(rep(0, 9), rep(10, 5), rep(0, 5)),
      evacuation_intra_more = 0,
      evacuation_inter_more = 0,
      evacuation_inter_less = 0,
      counter = 1:19,
      week = levels(ca$week)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Treated (5 weeks)"),
  
  # Control Group
  c6 %>%
    setx(
      # For Louisiana, simulate having no evacuation until the week of the disaster
      # remember, we're looking at week 13 as two weeks after the disaster, 
      # when we would first see the effect of evacuation two weeks prior
      evacuation_intra_less = 0,
      
      evacuation_intra_more = 0,
      evacuation_inter_more = 0,
      evacuation_inter_less = 0,
      counter = 1:19,
      week = levels(ca$week)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "Control"))
```
```{r}

out <- bind_rows(lout,cout, .id = "model") %>%
  mutate(case = model %>% recode_factor(
    "1" = "(%) Test Positivity Rate\nHurricane Zeta\n(Louisiana)",
    "2" = "Case Rate\nper 100,000 residents\nGlass Fire\n(California)")) %>%
  #mutate(week = as.numeric(week)) %>%
  mutate(oneweek = if_else(model == 1, levels(la$week)[11], levels(ca$week)[8])) %>%
  mutate(twoweeks = if_else(model == 1, levels(la$week)[12], levels(ca$week)[9])) %>%
  mutate(threeweeks = if_else(model == 1, levels(la$week)[13], levels(ca$week)[10])) %>%
  mutate(fourweeks = if_else(model == 1, levels(la$week)[14], levels(ca$week)[11])) %>%
  mutate(disaster = if_else(model == 1, levels(la$week)[10], levels(ca$week)[7]))  %>%
  dplyr::select(week, type, model, case, disaster, oneweek, twoweeks, threeweeks,fourweeks, contains("qi_"))

viz <- bind_rows(
  out %>%
    filter(type %in% c("Treated (1 week)", "Control")) %>%
    mutate(class = "Treated (1 week)"),
  out %>%
    filter(type %in% c("Treated (2 weeks)", "Control")) %>%
    mutate(class = "Treated (2 weeks)"),
  out %>%
    filter(type %in% c("Treated (3 weeks)", "Control")) %>%
    mutate(class = "Treated (3 weeks)"),
  
  out %>%
    filter(type %in% c("Treated (4 weeks)", "Control")) %>%
    mutate(class = "Treated (4 weeks)"),
  
  out %>%
    filter(type %in% c("Treated (5 weeks)", "Control")) %>%
    mutate(class = "Treated (5 weeks)")
  ) %>%
    mutate(linetype = if_else(str_detect(type, "Treated"), "Treated", "Control")) %>%
  mutate(class = factor(class, levels = c(
    "Treated (1 week)", "Treated (2 weeks)", "Treated (3 weeks)",
    "Treated (4 weeks)", "Treated (5 weeks)"))) %>%
  mutate(week = as.Date(week)) %>%
  filter(!class %in% c("Treated (5 weeks)"))  %>%
  filter(class != "Treated (4 weeks)")


mylines <- viz %>%
  filter(type != "Control") %>%
  select(case, class,oneweek, twoweeks, threeweeks, fourweeks, disaster) %>%
  distinct() %>%
  pivot_longer(cols = c(oneweek, twoweeks, threeweeks, fourweeks,disaster), 
               names_to = "variable", values_to = "week") %>%
  mutate(week = as.Date(week)) %>%
  mutate(linetype = variable %>% dplyr::recode_factor(
    "oneweek" = "Treated (1 week)",
    "twoweeks" = "Treated (2 weeks)",
    "threeweeks" = "Treated (3 weeks)",
    "fourweeks" = "Treated (4 weeks)",
    "disaster" = "Week of Disaster"))  %>%
  filter(as.character(class) == as.character(linetype) | 
           as.character(linetype) == "Week of Disaster") %>%
  filter(class != "Treated (5 weeks)") %>%
  mutate(color = variable %>% dplyr::recode_factor(
    "oneweek" = "# of Weeks since Disaster",
    "twoweeks" = "# of Weeks since Disaster",
    "threeweeks" = "# of Weeks since Disaster",
    "fourweeks" = "# of Weeks since Disaster",
    "disaster" = "Week of Disaster")) 

mybands <- mylines %>%
  mutate(variable = if_else(variable == "disaster", "disaster", "intercept")) %>%
  select(case,class, variable, week) %>%
  distinct() %>%
  pivot_wider(id_cols = c(case, class), names_from = variable, values_from = week) %>%
  mutate(group = 1) %>%
  mutate(ymin = if_else(str_detect(case, "Hurricane Zeta"), 0, 0),
         ymax = if_else(str_detect(case, "Hurricane Zeta"), 60, 40000)) %>%
  filter(class != "Treated (4 weeks)")

```

```{r}
appender <- function(string) str_replace(string, pattern = "Treated ", replacement = "Treated\n") %>%
  str_replace(pattern = "[(][%][)] Test ", replacement = "(%) Test\n")

g1 <- ggplot() +
  facet_grid(case~class, scales = "free_y", 
             labeller = as_labeller(appender)) +
  geom_rect(data = mybands,
            mapping = aes(xmin = disaster, xmax = intercept,
                          ymin = ymin, ymax = ymax,
                          group = group, fill = "Weeks of Mobility Change"),
            alpha = 0.10) +
  geom_linerange(data = mybands, 
             mapping = aes(x = disaster,
                           ymin = ymin, ymax = ymax,
                           group = group, color = "Week of Disaster"),
              alpha = 0.9, size = 1.25) +
  # Keep everything after the disaster OR control
  geom_ribbon(data = viz %>%
                filter(week >= disaster | linetype == "Control"), 
              mapping = aes(x = week, y = exp(qi_ci_median),
                            ymin = exp(qi_ci_min), ymax = exp(qi_ci_max),
                            fill = linetype,
                            group = linetype, linetype = linetype),
              alpha = 0.30, color = NA) +
  geom_line(data = viz, 
            mapping = aes(x = week, y = exp(qi_ci_median),
                          ymin = exp(qi_ci_min), ymax = exp(qi_ci_max),
                          group = linetype, linetype = linetype)) +
  scale_color_manual(
    breaks = c("Week of Disaster"),
    labels = c("Week of\nDisaster"),
    values = c("darkgrey"), guide = guide_legend(order = 2)) +
  scale_fill_manual(
    breaks = c("Treated", "Control", "Weeks of Mobility Change"),
    labels = c("Treated\n(Max = 10)",
               "Control\n(None = 0)",
               "Weeks of\nMobility Change"),
    values = c("#DC267F", "#FFB000", "#785EF0"), guide = guide_legend(order = 1)) +
  scale_y_log10() +
  theme_bw(base_size = 16) +
  theme(panel.grid = element_blank()) +
  labs(x = "Weeks (19)",color = NULL,
       y = "Expected COVID-19 Spread Rates\n(y-axis on a log-scale)",
       subtitle = "Simulated Effect of Decreases in Movement\nfor 1, 2, or 3 weeks on COVID-19 Spread Rates",
        fill = "Sheltering-in-Place\n(Decreases in Movement)")+ 
  theme(legend.position = "bottom",
        strip.text.y = element_text(angle = 0),
        plot.subtitle = element_text(hjust = 0.5)) +
  guides(linetype = "none")


ggsave(g1, filename = "viz/sim_time.png", dpi =500, width = 10, height = 8)
```

# 7. Mapping

## Map Evacuation 

```{r}
library(sf)
library(rgdal)
library(tidyverse)
library(sfnetworks)
library(tidygraph)
#install.packages("ggspatial")

# Get Albers Equal Area Conic Projection
#https://spatialreference.org/ref/esri/north-america-albers-equal-area-conic/
aea <- "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"


# I'm going to map both our disasters, using the following layers:

# 1. Damage, as polygon fill
# 2. Source/Destination, as outline
# 3. Neighborhoods recorded, as points
# 4. Pathways as edges, width, single color

###################
# LOUISIANA 
###################

# Import network
net <- read_rds("raw_data/hurricane_zeta_la.rds") %>%
  st_transform(crs = aea) %>%
  filter(evacuation > 0)

# Identify just source and destination communities
sources <- net %>%
    mutate(from_geoid = .N()$geoid[from]) %>%
    as.data.frame() %>%
    select(geoid = from_geoid) %>%
    distinct() %>%
    mutate(source = "source")
destinations <- net %>%
    mutate(to_geoid = .N()$geoid[to]) %>%
    as.data.frame() %>%
    select(geoid = to_geoid) %>%
    distinct() %>% 
    mutate(destination = "destination")



# Let's get the damage which occurred as of the first week after the disaster
# We can see this by jumping to three weeks after the date of the disaster, 
# since disaster damage data is lagged by two weeks.
dat <- read_rds("raw_data/la_dataset.rds") %>%
  dplyr::select(csub, week, avg_rainfall, evacuation_more)  %>%
  # Disaster ran October 24 to November 2
  # We'll get the two-week total,
  # Adding together the rainfall for the following weeks
  # Oct 22-Oct 28 ("2020-10-28", lagged 2 weeks so we need "2020-11-11")
  # Oct 29-Nov 4 ("2020-11-04", lagged 2 weeks so we need "2020-11-18")
  filter(week == "2020-11-11" | week == "2020-11-18") %>%
  group_by(csub) %>%
  summarize(avg_rainfall = sum(avg_rainfall, na.rm = TRUE),
            evacuation_more = sum(evacuation_more, na.rm = TRUE)) %>%
  ungroup() %>%
  # Join in and make identifier for source/destination/both/neither towns
  left_join(by = c("csub" = "geoid"), y = sources) %>%
  left_join(by = c("csub" = "geoid"), y = destinations) %>%
  mutate(site_type = case_when(
    destination == "destination" & source != "source" ~ "Destination",
    destination == "destination" & source == "source" ~ "Source & Destination",
    destination != "destination" & source == "source" ~ "Source",
    TRUE ~ "No evacuees") %>%
      factor(levels = c("Source", "Destination", "Source & Destination", "No evacuees"))) %>%
  select(-source, -destination)

remove(sources, destinations)

# Get 5-digit fips codes for affected counties
mycounties <- read_rds("raw_data/hurricane_zeta_counties.rds")

# Get county subdivision polygons
shapes <- read_rds("shapes/csub.rds") %>%
  filter(str_sub(geoid, 1,5) %in% mycounties) %>%
  st_transform(crs = aea) %>%
  # also join in the subdivision covariate data
  left_join(by = c("geoid" = "csub"), y = dat)

# Get centroids
centroids <- net %>%
  st_as_sf("nodes") %>%
  left_join(by = c("geoid" = "csub"), y = dat) %>%
  filter(!is.na(evacuation_more)) %>%
  filter(site_type != "No evacuees")


# Get county polygons
counties <- read_rds("shapes/counties.rds") %>%
  filter(geoid %in% mycounties) %>%
  st_transform(crs = aea) 

# Get edges
edges <- net %>%
  filter(evacuation > 0) %>%
  st_as_sf("edges")

g1 <- ggplot() +
    # Set a nice grey outline to map over
  geom_sf(data = counties, fill = NA, color = "darkgrey", size = 5) +
  # Map damage proxy at county subdivision level
  geom_sf(data = shapes, mapping = aes(fill = avg_rainfall / 10), size = 0.1) +
  # Show counties
  # Add white edges to highlight pathways
  geom_sf(data = edges, mapping = aes(width = evacuation), color = "white", alpha = 0.15) +
  # Overlay county boundaries on top
  geom_sf(data = counties, fill = NA, color = "black", size = 0.5) +
  # This adds a nice white outline around the points
  geom_sf(data = centroids, mapping = aes(color = site_type,
                                          size = evacuation_more + 15), color = "white", show.legend = TRUE) +
  geom_sf(data = centroids, mapping = aes(color = site_type,
                                          size = evacuation_more)) +
  theme_void(base_size = 14) +
  viridis::scale_fill_viridis(option = "plasma") +
  scale_color_manual(values = c("#648FFF", "black")) +
  theme(legend.position = "right",
        plot.subtitle = element_text(hjust = 0.5),
        plot.margin = margin(0,0,0,0, unit = "in")) +
  guides(fill = guide_colorbar(barwidth = 1, barheight = 5)) +
  labs(fill = "Total\nRainfall (mm)\nOct. 22 to\nNov. 4, 2020",
       color = "Cities Tracked",
       size = "Total Evacuees\nper 1,000 residents",
       subtitle = "Hurricane Zeta\nSoutheastern Louisiana") +
  scale_size_continuous(breaks = c(0, 25, 50, 75, 100)) +
  guides(size = FALSE, color = FALSE) +
  ggspatial::annotation_scale(location = "bl", pad_x = unit(0.5, "in")) +
  ggspatial::annotation_north_arrow(
    which_north = "true",
    height = unit(0.25, "in"),
    width = unit(0.25, "in"),
    location = "bl") 

###################
# Glass Fire 
###################

# Import network
net <- read_rds("raw_data/glass_fire_ca.rds") %>%
  st_transform(crs = aea) %>%
  filter(evacuation > 0)

# Identify just source and destination communities
sources <- net %>%
    mutate(from_geoid = .N()$geoid[from]) %>%
    as.data.frame() %>%
    select(geoid = from_geoid) %>%
    distinct() %>%
    mutate(source = "source")
destinations <- net %>%
    mutate(to_geoid = .N()$geoid[to]) %>%
    as.data.frame() %>%
    select(geoid = to_geoid) %>%
    distinct() %>% 
    mutate(destination = "destination")



# Let's get the damage which occurred as of the first week after the disaster
# We can see this by jumping to three weeks after the date of the disaster, 
# since disaster damage data is lagged by two weeks.
dat <- read_rds("raw_data/ca_dataset.rds") %>%
  dplyr::select(csub, week, burn_rate_int, evacuation_more) %>%
  # Disaster ran Sept 27 - Oct. 20, but most of the heaviest damage occurred by Oct 7
  # We'll get the two-week total,
  # Adding together the approximate percentage burned for the following weeks
  # Sept 28-Oct 04 ("2020-10-04", lagged two weeks so we need "2020-10-19")
  # Oct 04 - Oct 11 ("2020-10-12", lagged two weeks so we need "2020-10-26")
  filter(week == "2020-10-19" | week == "2020-10-26") %>%
  group_by(csub) %>%
  summarize(burn_rate_int = sum(burn_rate_int, na.rm = TRUE),
            evacuation_more = sum(evacuation_more, na.rm = TRUE)) %>%
  ungroup() %>%
  # Join in and make identifier for source/destination/both/neither towns
  left_join(by = c("csub" = "geoid"), y = sources) %>%
  left_join(by = c("csub" = "geoid"), y = destinations) %>%
  mutate(site_type = case_when(
    destination == "destination" & source != "source" ~ "Destination",
    destination == "destination" & source == "source" ~ "Source & Destination",
    destination != "destination" & source == "source" ~ "Source",
    TRUE ~ "No evacuees") %>%
      factor(levels = c("Source", "Destination", "Source & Destination", "No evacuees"))) %>%
  select(-source, -destination)

remove(sources, destinations)

# Get 5-digit fips codes for affected counties
mycounties <- read_rds("raw_data/glass_fire_counties.rds")

# Get county subdivision polygons
shapes <- read_rds("shapes/csub.rds") %>%
  filter(str_sub(geoid, 1,5) %in% mycounties) %>%
  st_transform(crs = aea) %>%
  # also join in the subdivision covariate data
  left_join(by = c("geoid" = "csub"), y = dat)

# Get centroids
centroids <- net %>%
  st_as_sf("nodes") %>%
  left_join(by = c("geoid" = "csub"), y = dat) %>%
  filter(!is.na(evacuation_more)) %>%
  filter(site_type != "No evacuees")


# Get county polygons
counties <- read_rds("shapes/counties.rds") %>%
  filter(geoid %in% mycounties) %>%
  st_transform(crs = aea) 

# Get edges
edges <- net %>%
  filter(evacuation > 0) %>%
  st_as_sf("edges")

g2 <- ggplot() +
  # Set a nice grey outline to map over
  geom_sf(data = counties, fill = NA, color = "darkgrey", size = 5) +
  # Map damage proxy at county subdivision level
  geom_sf(data = shapes, mapping = aes(fill = burn_rate_int), size = 0.1) +
  # Show counties
  # Add white edges to highlight pathways
  geom_sf(data = edges, mapping = aes(width = evacuation), color = "white", alpha = 0.15) +
  # Overlay county boundaries on top
  geom_sf(data = counties, fill = NA, color = "black", size = 0.5) +
  # This adds a nice white outline around the points
  geom_sf(data = centroids, mapping = aes(color = site_type,
                                          size = evacuation_more + 15), color = "white", show.legend = TRUE) +
  geom_sf(data = centroids, mapping = aes(color = site_type,
                                          size = evacuation_more)) +
  theme_void(base_size = 14) +
  viridis::scale_fill_viridis(option = "plasma") +
  scale_color_manual(values = c("#648FFF", "black")) +
  theme(legend.position = "right",
        plot.subtitle = element_text(hjust = 0.5),
        plot.margin = margin(0,0,0,0, unit = "in")) +
  guides(fill = guide_colorbar(barwidth = 1, barheight = 5)) +
  labs(fill = "% Estimated\nArea Burned\nSept. 28 to\nOct. 12, 2020",
       color = "Cities Tracked",
       size = "Total Evacuees\nper 1,000 residents",
       subtitle = "Glass Fire\nCentral California") +
   scale_size_continuous(breaks = c(0, 25, 50, 75, 100)) +
 ggspatial::annotation_scale(location = "bl", pad_x = unit(0.5, "in")) +
  ggspatial::annotation_north_arrow(
    which_north = "true",
    height = unit(0.25, "in"),
    width = unit(0.25, "in"),
    location = "bl") 

ggpubr::ggarrange(g1,g2, legend = "right") +
  theme(legend.box = "vertical") +
  ggsave("viz/evac_map.png", dpi = 500, width = 12, height = 6)

rm(list = ls())
```

## Hero Image

```{r}
library(sf)
library(rgdal)
library(tidyverse)
library(sfnetworks)
library(tidygraph)
#install.packages("ggspatial")

# Get Albers Equal Area Conic Projection
#https://spatialreference.org/ref/esri/north-america-albers-equal-area-conic/
aea <- "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"


# I'm going to map both our disasters, using the following layers:

# 1. Damage, as polygon fill
# 2. Source/Destination, as outline
# 3. Neighborhoods recorded, as points
# 4. Pathways as edges, width, single color

###################
# LOUISIANA 
###################

# Import network
net <- read_rds("raw_data/hurricane_zeta_la.rds") %>%
  st_transform(crs = aea) %>%
  filter(evacuation > 0)

# Identify just source and destination communities
sources <- net %>%
    mutate(from_geoid = .N()$geoid[from]) %>%
    as.data.frame() %>%
    select(geoid = from_geoid) %>%
    distinct() %>%
    mutate(source = "source")
destinations <- net %>%
    mutate(to_geoid = .N()$geoid[to]) %>%
    as.data.frame() %>%
    select(geoid = to_geoid) %>%
    distinct() %>% 
    mutate(destination = "destination")



# Let's get the damage which occurred as of the first week after the disaster
# We can see this by jumping to three weeks after the date of the disaster, 
# since disaster damage data is lagged by two weeks.
dat <- read_rds("raw_data/la_dataset.rds") %>%
  dplyr::select(csub, week, avg_rainfall, evacuation_more)  %>%
  # Disaster ran October 24 to November 2
  # We'll get the two-week total,
  # Adding together the rainfall for the following weeks
  # Oct 22-Oct 28 ("2020-10-28", lagged 2 weeks so we need "2020-11-11")
  # Oct 29-Nov 4 ("2020-11-04", lagged 2 weeks so we need "2020-11-18")
  filter(week == "2020-11-11" | week == "2020-11-18") %>%
  group_by(csub) %>%
  summarize(avg_rainfall = sum(avg_rainfall, na.rm = TRUE),
            evacuation_more = sum(evacuation_more, na.rm = TRUE)) %>%
  ungroup() %>%
  # Join in and make identifier for source/destination/both/neither towns
  left_join(by = c("csub" = "geoid"), y = sources) %>%
  left_join(by = c("csub" = "geoid"), y = destinations) %>%
  mutate(site_type = case_when(
    destination == "destination" & source != "source" ~ "Destination",
    destination == "destination" & source == "source" ~ "Source & Destination",
    destination != "destination" & source == "source" ~ "Source",
    TRUE ~ "No evacuees") %>%
      factor(levels = c("Source", "Destination", "Source & Destination", "No evacuees"))) %>%
  select(-source, -destination)

remove(sources, destinations)

# Get 5-digit fips codes for affected counties
mycounties <- read_rds("raw_data/hurricane_zeta_counties.rds")

# Get county subdivision polygons
shapes <- read_rds("shapes/csub.rds") %>%
  filter(str_sub(geoid, 1,5) %in% mycounties) %>%
  st_transform(crs = aea) %>%
  # also join in the subdivision covariate data
  left_join(by = c("geoid" = "csub"), y = dat)

# Get centroids
centroids <- net %>%
  st_as_sf("nodes") %>%
  left_join(by = c("geoid" = "csub"), y = dat) %>%
  filter(!is.na(evacuation_more)) %>%
  filter(site_type != "No evacuees")


# Get county polygons
counties <- read_rds("shapes/counties.rds") %>%
  filter(geoid %in% mycounties) %>%
  st_transform(crs = aea) 

# Get edges
edges <- net %>%
  filter(evacuation > 0) %>%
  st_as_sf("edges")

g1 <- ggplot() +
    # Set a nice grey outline to map over
  geom_sf(data = counties, fill = NA, color = "darkgrey", size = 5) +
  # Map damage proxy at county subdivision level
  geom_sf(data = shapes, mapping = aes(fill = avg_rainfall / 10), size = 0.1) +
  # Show counties
  # Add white edges to highlight pathways
  geom_sf(data = edges, mapping = aes(width = evacuation), color = "white", alpha = 0.15) +
  # Overlay county boundaries on top
  geom_sf(data = counties, fill = NA, color = "black", size = 0.5) +
  # This adds a nice white outline around the points
  geom_sf(data = centroids, mapping = aes(color = site_type,
                                          size = evacuation_more + 15), color = "white", show.legend = TRUE) +
  geom_sf(data = centroids, mapping = aes(color = site_type,
                                          size = evacuation_more)) +
  theme_void(base_size = 14) +
  viridis::scale_fill_viridis(option = "viridis") +
  scale_color_manual(values = c("#648FFF", "black")) +
  theme(legend.position = "right",
        plot.subtitle = element_text(hjust = 0.5),
        plot.margin = margin(0,0,0,0, unit = "in")) +
  guides(fill = guide_colorbar(barwidth = 1, barheight = 5)) +
  scale_size_continuous(breaks = c(0, 25, 50, 75, 100), range = c(1,10)) +
  guides(fill = "none", color = "none", size = "none") +
  labs(subtitle = NULL) +
  coord_sf(xlim = c(430000, 620000), ylim = c(-1180000, -1080000))

ggsave(g1, filename = "viz/hero_image.png", dpi = 1000, width = 8, height = 4)
```


## Matched Cities

```{r}
library(tidyverse)
library(ggpubr)

dat <- bind_rows(
  # So, from within this affected area, let's identify some communities which experienced less rainfall than others.
  read_rds("raw_data/la_dataset.rds") %>%
    # Zooming into the week of the disaster's damage data (2 weeks after disaster),
    filter(week == "2020-11-11") %>%
    # Let's identify those above and below the median
    mutate(treat = ntile(avg_rainfall, 2) - 1) %>%
    left_join(by = c("csub"), y = read_rds("raw_data/la_disaster_match.rds")) %>%
    filter(weights > 0) %>%
    mutate(case = "Hurricane Zeta",
           type = "Matched for\nDisaster\nDamage"),
  
  # Now, let's repeat for California
  read_rds("raw_data/ca_dataset.rds") %>%
    # Zoom into the week of the disaster,
    # Since active_fires is lagged by 2 weeks, we need to zoom into 2 weeks AFTER
    # the date of the disaster ("2020-09-28"), switching to "2020-10-12"
    filter(week == "2020-10-12") %>%
    rename(treat = active_fire) %>%
    left_join(by = c("csub"), y = read_rds("raw_data/ca_disaster_match.rds")) %>%
    filter(weights > 0)  %>%
    mutate(case = "Glass Fire",
           type = "Matched for\nDisaster\nDamage"),
  
  # So, from within this affected area, let's identify some communities which experienced less rainfall than others.
  read_rds("raw_data/la_dataset.rds") %>%
    # Since each rainfall stat is lagged by 2 weeks,
    # we must zoom into TWO WEEKS AFTER THE WEEK OF THE DISASTER,
    # switching from the start of the disaster in "2020-10-28" to "2020-11-11"
    # Zooming into the  week of the disaster,
    filter(week == "2020-11-11") %>%
    # Let's identify those with increased evacuation above and below the median
    mutate(treat = if_else(evacuation_more > 0, 1, 0)) %>%
    left_join(by = c("csub"), y = read_rds("raw_data/la_evac_match.rds")) %>%
    filter(weights > 0) %>%
    mutate(case = "Hurricane Zeta",
           type = "Matched for\nEvacuation"),
  
  
  # So, from within this affected area, let's identify some communities which experienced less rainfall than others.
  read_rds("raw_data/ca_dataset.rds") %>%
    # Zoom into the week of the disaster,
    # Since active_fires is lagged by 2 weeks, we need to zoom into 2 weeks AFTER
    # the date of the disaster ("2020-09-28"), switching to "2020-10-12"
    filter(week == "2020-10-12") %>%
    # Let's identify those with increased evacuation above and below the median
    mutate(treat = if_else(evacuation_more > 0, 1, 0)) %>%
    left_join(by = c("csub"), y = read_rds("raw_data/ca_evac_match.rds")) %>%
    filter(weights > 0) %>%
    mutate(case = "Glass Fire",
           type = "Matched for\nEvacuation"),
  
  
  
  # So, from within this affected area, let's identify some communities which experienced less rainfall than others.
  read_rds("raw_data/la_dataset.rds") %>%
    # Since each rainfall stat is lagged by 2 weeks,
    # we must zoom into TWO WEEKS AFTER THE WEEK OF THE DISASTER,
    # switching from the start of the disaster in "2020-10-28" to "2020-11-11"
    # Zooming into the  week of the disaster,
    filter(week == "2020-11-11") %>%
    # Let's identify those with DECREASED evacuation above and below the median
    mutate(treat = if_else(evacuation_less > 0, 1, 0)) %>%
    left_join(by = c("csub"), y = read_rds("raw_data/la_shelter_match.rds")) %>%
    filter(weights > 0) %>%
    mutate(case = "Hurricane Zeta",
           type = "Matched for\nSheltering\nin Place"),
  
  
  # So, from within this affected area, let's identify some communities which experienced less rainfall than others.
  read_rds("raw_data/ca_dataset.rds") %>%
    # Zoom into the week of the disaster,
    # Since active_fires is lagged by 2 weeks, we need to zoom into 2 weeks AFTER
    # the date of the disaster ("2020-09-28"), switching to "2020-10-12"
    filter(week == "2020-10-12") %>%
    # Let's identify those with DECREASED evacuation above and below the median
    mutate(treat = if_else(evacuation_less > 0, 1, 0)) %>%
    left_join(by = c("csub"), y = read_rds("raw_data/ca_shelter_match.rds")) %>%
    filter(weights > 0) %>%
    mutate(case = "Glass Fire",
           type = "Matched for\nSheltering\nin Place")
) %>%
  select(case, type, csub, weights, treat, pop, pop_density, social_capital, svi) %>%
  # join in names
  left_join(by = c("csub" = "geoid"), 
            y = read_rds("shapes/csub.rds") %>%
              as.data.frame() %>%
              select(geoid, name)) %>%
  # join in county name
  mutate(fips = str_sub(csub, 1,5)) %>%
  left_join(by = c("fips" = "geoid"), y = read_rds("shapes/counties.rds") %>% 
              as.data.frame() %>%
              select(geoid, county = name)) %>%
  # Some county subdivisions have weird names (eg. just a number)
  # Just assign them the name of their county, followed by their number
  mutate(name = if_else(!is.na(as.numeric(name)) | 
                          name %in% c("A", "B", "1A", "1B"), 
                        paste(county, " ", name, sep = ""), name)) %>%
  mutate(case = case %>% recode_factor(
    "Hurricane Zeta" = "Hurricane\nZeta\n(Southeastern\nLouisiana)",
    "Glass Fire" = "Glass\nFire\n(Central\nCalifornia)")) %>%
  mutate(treat = treat %>% recode_factor(
    "1" = "Treatment Group",
    "0" = "Matched Control Group")) %>%
  arrange(county, name) %>%
  mutate(order = 1:n())

g1 <- dat %>% 
  filter(case == "Hurricane\nZeta\n(Southeastern\nLouisiana)") %>%
  ggplot(mapping = aes(x = type, y = reorder(name, -order), label = name, fill = treat)) +
  geom_tile(color = "white", size = 0.5) +
  facet_grid(county~type, scales = "free", space = "free") +
      theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.grid = element_blank()) +
  theme(strip.text.y = element_text(angle = 0),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
                plot.subtitle = element_text(hjust = 0.5),
        legend.position = "bottom") +
  labs(y = NULL,
       x = NULL, fill = "Group",
       subtitle = "Hurricane Zeta\n(Southeastern Louisiana)") +
  scale_fill_manual(values = c("#785EF0", "#FFB000")) 

g2 <- dat %>% 
  filter(case != "Hurricane\nZeta\n(Southeastern\nLouisiana)") %>%
  ggplot(mapping = aes(x = type, y = reorder(name, -order), label = name, fill = treat)) +
  geom_tile(color = "white", size = 0.5) +
  facet_grid(county~type, scales = "free", space = "free") +
    theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.grid = element_blank()) +
  theme(strip.text.y = element_text(angle = 0),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        plot.subtitle = element_text(hjust = 0.5),
        legend.position = "bottom") +
  labs(y = NULL,
       x = NULL, fill = "Group",
              subtitle = "Glass Fire\n(Central California)") +
  scale_fill_manual(values = c("#785EF0", "#FFB000")) 

ggpubr::ggarrange(g1,g2,ncol = 2,common.legend = TRUE, legend = "bottom") +
  theme(legend.text = element_text(size = 18),
        legend.title = element_text(size = 18)) +
  ggsave("viz/matched_table.png", dpi = 300, width = 13, height = 15)
```



# 8. Validation Tests

Our paper at times uses test positivity rates, and at other times, uses case rate data, because test positivity rates were not available. Are these comparable, particularly in our states of interest?


## Get Data

```{r, eval = FALSE}
library(tidyverse)

# Downloading from the John Hopkins COVID-19 tracking project's github page:
#https://github.com/govex/COVID-19/tree/master/data_tables/testing_data

# Download overall time series data
read_csv("https://raw.githubusercontent.com/govex/COVID-19/master/data_tables/testing_data/time_series_covid19_US.csv") %>%
  write_csv("raw_data/covariates/testing_state.csv")

read_csv("raw_data/covariates/testing_state.csv") %>% 
  select(date, state, tests_combined_total) %>%
  mutate(date = lubridate::mdy(date)) %>%
  saveRDS("raw_data/covariates/testing_state.rds")


# Download county time series data
read_csv("https://raw.githubusercontent.com/govex/COVID-19/master/data_tables/testing_data/county_time_series_covid19_US.csv") %>%
  write_csv("raw_data/covariates/testing_county.csv")

read_csv("raw_data/covariates/testing_county.csv") %>% 
  select(fips, date, state = province_state, tests_combined_total) %>%
  saveRDS("raw_data/covariates/testing_county.rds")


download.file("https://raw.githubusercontent.com/govex/COVID-19/master/data_tables/JHU_USCountymap/df_Counties2020.csv", 
              destfile = "raw_data/covariates/cases_county.csv")

read_csv("raw_data/covariates/cases_county.csv") %>%
  dplyr::select(fips = FIPS, date = dt, cases_new = NewCases, pop = Population) %>%
  # In some cases, there are 'negative case rates', 
  # This is a data error. 
  # I've reset these to 0, so as to keep the information,
  # that there were no new cases,
  # but so as not to include counter-intuitive negatives
  # which don't really make sense.
  mutate(cases_new = if_else(cases_new < 0, 0, cases_new)) %>%
  # Calculate cases per 100,000 residents
  mutate(case_rate = cases_new / pop * 100000 ) %>%
  # Filter just to populated counties
  filter(pop > 0) %>%
  saveRDS("raw_data/covariates/case_rates_county.rds")


# Aggregate to state level
read_rds("raw_data/covariates/case_rates_county.rds") %>% 
  mutate(state = str_sub(fips, 1, 2)) %>%
  group_by(state, date) %>%
  summarize(cases_new = sum(cases_new, na.rm = TRUE),
            pop = sum(pop, na.rm = TRUE),
            case_rate = cases_new / pop) %>%
  saveRDS("raw_data/covariates/case_rates_state.rds")
```




## State-level

```{r}
library(tidyverse)
```

```{r}
# Get state fips codes
mystates <- tigris::fips_codes %>%
  select(state, state_code, state_name) %>%
  distinct()

states <- read_rds("raw_data/covariates/case_rates_state.rds") %>%
  filter(state != "00") %>%
  rename(state_code = state) %>%
  left_join(by = "state_code", y = mystates)  %>%
  # Join in  state testing levels
  left_join(by = c("date", "state" = "state"),
            y = read_rds("raw_data/covariates/testing_state.rds")) %>%
  # Calculate new tests conducted each day
  arrange(state_code, date) %>%
  group_by(state_code) %>%
  mutate(tests = tests_combined_total - lag(tests_combined_total, 1)) %>%
  # IF they administer a negative number of tests in a day,
  # we'll call that zero
  mutate(tests = if_else(tests < 0, 0, tests)) %>%
  ungroup() %>%
  # Zoom into just days where we can compare all three records
  mutate(positivity_rate = cases_new / tests) %>%
  filter(!is.na(tests), !is.nan(tests), !is.infinite(tests),
         !is.na(case_rate),!is.nan(case_rate), !is.infinite(case_rate),  
         !is.na(pop)) %>%
  filter(!is.infinite(positivity_rate), !is.na(positivity_rate)) %>%
  # Zoom into days with any cases and any positivity rate
  # that way we're sure they were actually testing
  filter(case_rate > 0, positivity_rate > 0) %>%
  ungroup() %>%
  # Calculate logged versions
  mutate(log_positivity_rate = log(positivity_rate),
         log_case_rate = log(case_rate)) %>%
  filter(date > "2020-03-01") %>%
  mutate(sclog_positivity_rate = scale(log_positivity_rate),
         sclog_case_rate = scale(log_case_rate)) %>%
  # Keep only entries that could be compared
  # filter(!is.nan(log_case_rate), !is.nan(log_positivity_rate)) %>%
  mutate(group = if_else(state %in% c("CA", "LA"), state, "Other")) 


avg <- states %>%
  # Calculate for each week the average
  group_by(state, date = lubridate::ceiling_date(date, unit = "week", 
                                                 week_start = 1)) %>%
  summarize(case_rate = mean(case_rate, na.rm = TRUE),
            positivity_rate = mean(positivity_rate, na.rm = TRUE)) %>%
  ungroup() %>%
  # Calculate logged versions
  mutate(log_positivity_rate = log(positivity_rate),
         log_case_rate = log(case_rate)) %>%
  filter(date > "2020-03-01") %>%
  mutate(sclog_positivity_rate = scale(log_positivity_rate),
         sclog_case_rate = scale(log_case_rate)) %>%
  # Keep only entries that could be compared
  # filter(!is.nan(log_case_rate), !is.nan(log_positivity_rate)) %>%
  mutate(group = if_else(state %in% c("CA", "LA"), state, "Other")) 



dat <- bind_rows(
  states %>%
    group_by(date) %>%
    summarize(cor = cor(log_positivity_rate, log_case_rate, 
                        use = "pairwise.complete.obs")) %>%
    mutate(type = "Daily"),
  avg %>%
    group_by(date) %>%
    summarize(cor = cor(log_positivity_rate, log_case_rate, 
                        use = "pairwise.complete.obs")) %>%
    mutate(type = "7-Day Avg.")
)


dat <- bind_rows(
  dat %>% mutate(class = "outline"),
  dat %>% mutate(class = "main")) %>%
  mutate(group = paste(class, type))

# Get time period
mytime <- data.frame(
    label = "Sample Period\n(Aug. 17 - Dec. 23, 2020)",  
    from = as.Date("2020-08-17"),
    to = as.Date("2020-12-23"))

mycases <- data.frame(
  label = c("Hurricane\nZeta", "Glass\nFire"),
  date = c("2020-10-28", "2020-09-27") %>% as.Date(),
  version = c("Disaster Date")) %>%
  mutate(group =  paste(label, version))

myback <- expand_grid(
  cor = seq(from = -1, to = 1, length.out = 100),
  date = seq(from = dat$date %>% min(),
             to = dat$date %>% max(),
             length.out = 100))

mycor <- avg %>%
  group_by(date) %>%
  summarize(cor = cor(log_positivity_rate, log_case_rate, 
                      use = "pairwise.complete.obs")) %>%
  filter(date == "2020-09-28" | date == "2020-10-26") %>%
   mutate(label = round(cor, 2) %>% paste())




mybars <- data.frame(
  label = c("Strong", "Moderate", "Weak", "Poor"),
  ymax = c(1, 0.68, 0.34, 0),
  ymin = c(0.65, 0.31, -0.01, -1),
  y = c(0.85, 0.55, 0.15, -0.25))

```


```{r}
g1 <- ggplot() +
  geom_tile(data = myback,
            mapping = aes(x = date,y = cor,
                          fill = cor), alpha = 0.5) +
  scale_fill_gradient2(low = "#DC267F", 
                        mid = "white",midpoint = 0,
                        high = "#648FFF") +
  geom_hline(yintercept = 0, color = "grey", 
             linetype = "solid", size=  1.25) +
  geom_rect(data = mytime, 
            mapping = aes(
              linetype = "Study Period",
              xmin = from, xmax = to, 
              ymin = -1, ymax = 1),
            alpha = 0.25) +
  geom_vline(data = mycases, 
             mapping = aes(xintercept = date, 
                           group = group,
                           linetype = "Disaster")) +
  geom_text(data = mycases,
            mapping = aes(x = date, y = c(-0.25, -0.5),
                          label = label), size = 4, 
            hjust = c(0, 1), nudge_x = c(5, -5)) +
  geom_line(data = dat %>%
              filter(class == "outline"),
            mapping = aes(x = date, y = cor,
                          size = group,
                          alpha = group,
                          group = type),
            linetype = "solid", color = "white") +
  geom_line(data = dat %>%
              filter(class == "main"),
            mapping = aes(x = date, y = cor,
                          size = group,
                          alpha = group,
                          color = type, 
                          group = type),
            linetype = "solid") +
  geom_point(data = mycor,
             mapping = aes(x = date, y = cor,
                           label = cor),
             size = 9, color = "#785EF0") +
  geom_point(data = mycor,
             mapping = aes(x = date, y = cor,
                           label = cor),
             size = 7.5,
             color = "white") +
  geom_text(data = mycor,
             mapping = aes(x = date, y = cor,
                           label = label),
             size = 2.75) +
  
  labs(x = "Days",
       y = "Correlation (Pearson's r)\nbetween Positivity Rate and Case Rate",
       subtitle = "Strong Positive Relationship between\nTest Positivity Rates and Case Rates during Study Period (among US States)",
       color = "Measure", linetype = NULL) +
  theme_bw(base_size = 14) +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        plot.subtitle = element_text(hjust = 0.5)) +
  guides(fill = "none", size = "none", alpha = "none") +
  scale_size_manual(values = c(1.5, 0.5, 2, 1)) +
    scale_alpha_manual(values = c(1, 0.5, 1, 0.5)) +
  scale_color_manual(
    values = c("#785EF0", "black"),
                     guide = guide_legend(
                       order = 1,
                       override.aes = list(
                         size = 2))) +
  scale_linetype_manual(values = c("dashed", "blank"),
                        guide = guide_legend(
                          order = 2, override.aes = list(
                            fill = c("white", "darkgrey"),
                            size = 0.75,
                            alpha = c(1, 0.90)))) +
  theme(legend.spacing = unit(0, "cm"),
        legend.position = "bottom") +
  geom_errorbar(data = mybars,
                mapping = aes(x = as.Date("2021-04-10"),
                              group = label,
                              ymin = ymin, ymax = ymax),
                width = 10, size = 1.25) +
  geom_label(data = mybars,
            mapping = aes(x = as.Date("2021-04-5"),
                          y = y, fill = y,
                          group = label,
                          label = label), hjust = 1,
            size = 3, outline.color = NA, alpha = 0.75) 
ggsave(g1, filename = "viz/test_cor.png", dpi = 500, width = 8, height = 6)
#
#* Rates log-transformed because right-skewed.
# Range provides rough benchmarks against which we can judge the correlation.
# If strong or moderate, then we can largely trust that results which are true for one are true for the other.
# If poor, then we really shouldn't trust those

rm(list = ls())
```

## CA / LA comparison

```{r}
# How far is each score from the national mean
overall <- bind_rows(
  
  avg %>%
    filter(state == "CA") %>%
    group_by(date) %>%
    summarize(positivity_rate = mean(sclog_positivity_rate, na.rm = TRUE),
              case_rate = mean(sclog_case_rate, na.rm = TRUE)) %>%
    mutate(state = "California"),
  
  avg %>%
    filter(state == "LA") %>%
    group_by(date) %>%
    summarize(positivity_rate = mean(sclog_positivity_rate, na.rm = TRUE),
              case_rate = mean(sclog_case_rate, na.rm = TRUE)) %>%
    mutate(state = "Louisiana"),
  
  
  avg %>%
    group_by(date) %>%
    summarize(positivity_rate = mean(sclog_positivity_rate, na.rm = TRUE),
              case_rate = mean(sclog_case_rate, na.rm = TRUE)) %>%
    mutate(state = "United States Overall")) 



mylines <- overall %>%
  pivot_longer(cols = c(positivity_rate, case_rate),
               names_to = "variable", values_to = "value") %>%
  mutate(group = paste(variable, state))


mycor <- overall %>%
  # Get correlation during study period
  filter(date >= mytime$from, date <= mytime$to) %>%
  group_by(state) %>%
  summarize(cor = cor(positivity_rate, case_rate),
            label = round(cor, 2) %>% paste())


points <- bind_rows(
  
  states %>%
    filter(state == "CA") %>%
    group_by(date) %>%
    summarize(positivity_rate = mean(sclog_positivity_rate, na.rm = TRUE),
              case_rate = mean(sclog_case_rate, na.rm = TRUE)) %>%
    mutate(state = "California"),
  
  states %>%
    filter(state == "LA") %>%
    group_by(date) %>%
    summarize(positivity_rate = mean(sclog_positivity_rate, na.rm = TRUE),
              case_rate = mean(sclog_case_rate, na.rm = TRUE)) %>%
    mutate(state = "Louisiana"),
  
  
  states %>%
    group_by(date) %>%
    summarize(positivity_rate = mean(sclog_positivity_rate, na.rm = TRUE),
              case_rate = mean(sclog_case_rate, na.rm = TRUE)) %>%
    mutate(state = "United States Overall")) %>%
    pivot_longer(cols = c(positivity_rate, case_rate),
               names_to = "variable", values_to = "value") %>%
  mutate(group = paste(variable, state))

mycases <- data.frame(
  label = c("Hurricane\nZeta", "Glass\nFire"),
  date = c("2020-10-28", "2020-09-27") %>% as.Date(),
  version = c("Disaster Date")) %>%
  mutate(group =  paste(label, version)) %>%
  mutate(state = c("Louisiana", "California"))


g1 <- ggplot() +
  geom_rect(data = mytime, 
            mapping = aes(
              fill = "Study Period",
              xmin = from, xmax = to, 
              ymin = -Inf, ymax = Inf),
            alpha = 0.25, color = NA) +
  geom_vline(data = mycases %>%
  mutate(state = c("Louisiana", "California")), 
             mapping = aes(xintercept = date, 
                           group = group,
                           color = "Disaster"), linetype = "solid") +
  geom_jitter(data = points,
              mapping = aes(x = date, y = value,
                            linetype = variable, color = variable, group = group),
              alpha = 0.1) +
  geom_line(data = mylines,
            mapping = aes(x = date, y = value, 
                          linetype = variable,
                          color = variable, group = group),
            size = 1) +
  geom_label(data = mycor,
            mapping = aes(x = as.Date("2020-10-01"),
                          y = -2,
                          label = paste("Study Period\nCorrelation:\nr = ", label, sep = ""))) +
  geom_text(data = mycases, 
            mapping = aes(x = date, y = -3.5, hjust = c(1, 0), nudge_x = c(-30, 30),
                          label = label), size = 4) +
  facet_wrap(~state, ncol = 3) +
  theme_bw(base_size = 14) +
  theme(legend.position = "bottom",
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.key=element_blank(),
        plot.subtitle = element_text(hjust = 0.5)) +
  labs(y = "Logged Rates\n(Standardized as Z-scores)",
       x = "Date") +
  scale_x_date(
    breaks = c("2020-06-01", "2020-10-01", "2021-02-01") %>% as.Date(),
    labels = c("Jun 2020", "Oct 2020", "Feb 2021")) +

  scale_color_manual(
    breaks = c("case_rate", "positivity_rate", "Disaster"),
    labels = c("Case Rate", "Positivity Rate", "Disaster"),
    values = c("#DC267F", "#648FFF", "black"), guide = guide_legend(order = 1)) +
  scale_fill_manual(values = "darkgrey", guide = guide_legend(order = 2)) +
  guides(linetype = "none") +
  labs(fill = NULL, color = "Measure",
       subtitle = "Test Positivity Rates and Case Rates\nclosely track in States of Interest during Study Period")

ggsave(g1, filename = "viz/test_cor_states.png", dpi = 500, width = 9.5, height = 6)

rm(list= ls())
```


## Testing Capacity

```{r}
# Get state fips codes
mystates <- tigris::fips_codes %>%
  select(state, state_code, state_name) %>%
  distinct()

states <- read_rds("raw_data/covariates/case_rates_state.rds") %>%
  filter(state != "00") %>%
  rename(state_code = state) %>%
  left_join(by = "state_code", y = mystates)  %>%
  # Join in  state testing levels
  left_join(by = c("date", "state" = "state"),
            y = read_rds("raw_data/covariates/testing_state.rds")) %>%
  # Calculate new tests conducted each day
  arrange(state_code, date) %>%
  group_by(state_code) %>%
  mutate(tests = tests_combined_total - lag(tests_combined_total, 1)) %>%
  # IF they administer a negative number of tests in a day,
  # we'll call that zero
  mutate(tests = if_else(tests < 0, 0, tests)) %>%
  filter(!is.na(tests)) %>%
  mutate(geo = case_when(
                         state == "CA" ~ "California",
                         state == "LA" ~ "Louisiana",
                         TRUE ~ "Other States")) %>%
  # Remove times where it says 0 tests were conducted
  # this basically means data wasn't collected on that date
  filter(tests != 0) %>%
  filter(date > "2020-03-01") %>%
  mutate(testing_rate = tests / pop * 100000)

# Get time period
mytime <- data.frame(
    label = "Sample Period\n(Aug. 17 - Dec. 23, 2020)",  
    from = as.Date("2020-08-17"),
    to = as.Date("2020-12-23"))

mycases <- data.frame(
  label = c("Hurricane\nZeta", "Glass\nFire"),
  date = c("2020-10-28", "2020-09-27") %>% as.Date(),
  version = c("Disaster Date")) %>%
  mutate(group =  paste(label, version)) %>%
  mutate(geo = c("Louisiana", "California"))

g1 <- ggplot() +
  geom_rect(data = mytime, 
            mapping = aes(
              xmin = from, xmax = to, 
              ymin = 0.1, 
              ymax = 20792,
              fill = "Study Period"),
            alpha = 0.5, color = NA) +
  scale_fill_manual(values = "grey") +
  geom_vline(data = mycases, 
             mapping = aes(xintercept = date, 
                           group = group,
                           color = "Disaster"), linetype = "solid") +
  geom_text(data = mycases, 
            mapping = aes(x = date, y = 1,
                          label = label), 
             hjust = c(1, 0), 
                          nudge_x = c(-10, 10),
            size = 4) +
  # Specify theme
  theme_bw(base_size = 14) +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        plot.subtitle = element_text(hjust = 0.5)) +
  theme(legend.position = "bottom") +
  # Add lines
  geom_line(data = states, 
            mapping = aes(x = date, y = testing_rate, group = state,
                          color = "Daily Rate"),
            alpha = 0.5) +
  scale_y_log10(limits = c(0.1, 20792)) +
  scale_color_manual(
    breaks = c("Daily Rate", "Loess-smoothed Curve", "Disaster"),
    values = c("#785EF0", "black", "darkgrey")) +
     scale_x_date(
       # zoom into window 60 days around the study period
       limits = c(as.Date(mytime$from) - 90, 
                  as.Date(mytime$to) + 90), 
    breaks = c("2020-06-01", "2020-10-01", "2021-02-01") %>% as.Date(),
    labels = c("Jun 2020", "Oct 2020", "Feb 2021")) +

  # Add loess smoothed lines
  geom_smooth(data = states, 
            mapping = aes(x = date, y = testing_rate, group = geo,
                          color = "Loess-smoothed Curve"),
            se = FALSE) +

  facet_wrap(~geo) +
  labs(x = "Date", 
       y = "Tests per 100,000 residents",
       subtitle = "Testing Capacity increased, not decreased, during Study Period",
       color = "Measure", fill = NULL) +
  guides(color = guide_legend(order = 1),
         fill = guide_legend(order = 2))  

ggsave(g1, filename = "viz/test_capacity.png", dpi = 500, width = 8, height = 5)
```


## Temperature

It is not possible to control for temperature and humidity in most models.
Here, we justify why the current specification is sufficient without it.

```{r}

dat <- bind_rows(
  read_rds("raw_data/la_dataset.rds") %>%
     # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("positivity_rate")), 
            funs(if_else(
    condition = . == 0,
    true = sort(unique(.))[2] / 2,
    false = .))) %>%
    select(csub, week, rate = positivity_rate, weathertrans, humidity_b, tmean) %>%
    pivot_longer(cols = c(weathertrans:tmean),
                 names_to = "variable", values_to = "value") %>%
    mutate(type = "Louisiana"),
  
  read_rds("raw_data/ca_dataset.rds") %>%
     # Fill in zeros for positivity rate with the next smallest value, divided by half
  # this is a good way to deal with a 0 rate, 
  # which is probably not accurate - they probably have COVID,
  # just low levels of it.
  mutate_at(vars(contains("case_rate")), 
            funs(if_else(
    condition = . == 0,
    true = sort(unique(.))[2] / 2,
    false = .))) %>%
    select(csub, week, rate = case_rate, weathertrans, humidity_b, tmean) %>%
    pivot_longer(cols = c(weathertrans:tmean),
                 names_to = "variable", values_to = "value") %>%
    mutate(type = "California"))  %>%
    mutate(variable = variable %>% recode_factor(
    "weathertrans" = "Climate\nZ-Score",
    "humidity_b" = "Relative\nHumidity",
    "tmean" = "Mean\nTemperature")) 

  #group_by(type, variable) %>%
  #mutate(value = scale(value))

dat %>%
  mutate(rate = log(rate)) %>%
  group_by(type, variable) %>%
  summarize(cor = cor(value, as.numeric(week), use = "pairwise.complete.obs"))

# Using fixed effects, you can explain....
mystats <- dat %>%
  mutate(group = paste(type, variable, sep = " - ")) %>%
  split(.$group) %>%
  map_dfr(~lm(formula = value ~ factor(week), data = .) %>%
            broom::glance(), .id = "group") %>%
  separate(col = group, into = c("type", "variable"), sep = " [-] ") %>%
  left_join(by = c("variable"),
            y= dat %>%
  group_by(variable) %>%
  summarize(mean = quantile(value, probs = 0.10, na.rm = TRUE)))


mymodels <- dat %>%
  mutate(group = paste(type, variable, sep = " - ")) %>%
  split(.$group) %>%
  map_dfr(~lm(formula = value ~ factor(week), data = .) %>%
        with(data.frame(predicted = .$fitted.values,
                            week = model$`factor(week)`)), .id = "group") %>%
  separate(col = group, into = c("type", "variable"), sep = " [-] ")

g1 <- ggplot() +
  geom_jitter(data = dat, mapping = aes(x = week, y = value, 
                                        fill = "City-Week Observation"),
              alpha = 0.25, shape = 21, color = "white") +
  geom_line(data = mymodels,
            mapping = aes(x = as.Date(week), y= predicted, 
                          color = "Predicted by Week",
                          group = variable),
            size = 2, alpha = 0.75, color = "white") +
  geom_line(data = mymodels,
            mapping = aes(x = as.Date(week), y= predicted, 
                          color = "Predicted by Week",
                          group = variable), size = 1.25, alpha = 0.75) +
  geom_smooth(data = dat, mapping = aes(x = week, y = value, 
                                        color = "Loess-Smoothed Curve"),
              method = "loess", alpha = 0.5) +
  
  geom_label(data = mystats,
            mapping = aes(x = as.Date("2020-10-01"),
                          y = mean,
                          label = paste("R2 = ", round(r.squared, 2), sep = "")),
            outline.color = NA, fill = "white", alpha = 0.75) +
  facet_grid(variable~type, scales = "free") +
  scale_color_manual(values = c("black", "darkgrey")) +
  scale_fill_manual(values = "#DC267F") +
  theme_bw(base_size = 14) +
  theme(panel.grid = element_blank(),
        strip.text.y = element_text(hjust = 0.5, angle = 0),
        legend.position = "bottom",
        plot.subtitle = element_text(hjust = 0.5)) +
  labs(x = "Date", y = "Climate Measure",
       subtitle = "Weekly Fixed Effects Closely Predict Climatic Controls",
       fill = NULL, color = NULL) +
  guides(fill = guide_legend(override.aes = list(size = 5)),
         color = guide_legend(override.aes = list(fill = NA)))


ggsave(g1, filename = "viz/test_climatic_controls.png", dpi = 500, width = 8, height = 6)
```

# 9. Table

## Extractor Function

```{r}
library(xml2)
library(rvest)
library(tidyverse)
library(tibble)
library(magrittr)

# Let's write a function to extract the data from
# a printed Texreg HTML file
get_table = function(myfile){
  
  # Read the html code from the original file
  mypanel <- read_html(paste("viz/", myfile, sep = ""))
  
  # Extract the top part of the table (the model names and headers)
  mytop <- mypanel %>%
    html_node("body") %>%
    html_node("table") %>%
    html_node("thead") %>% 
    html_table()
  
  # Bind the model names and headers into a data.frame
  myheader <- bind_cols(
    label = c("variable", 
              "model1", "model2", "model3", 
              "model4", "model5", "model6"),
    mytop %>% names() %>% data.frame(header = .) %>% na_if(""),
    mytop %>% unname() %>% unlist() %>% data.frame(model = .))
  
  
  # Extract the variables and beta coefficients
  mybeta <- mypanel %>%
    html_node("body") %>%
    html_node("table") %>%
    html_node("tbody") %>%
    html_table() %>%
    magrittr::set_colnames(value = c("predictor", 
                                     "model1", "model2", "model3", 
                                     "model4", "model5", "model6")) %>%
    # Pivot longer
    pivot_longer(cols = c(model1:model6),
               names_to = "label", values_to = "beta")
  
  mybeta %>%
    left_join(by = "label", y = myheader) %>%
    select(header, model, label, predictor, beta) %>%
    return()
}

# Now for each TEXREG HTML Table file,
mydat <- data.frame(
  file = c("table_1.html", 
           "table_1_disaster.html",
           "table_1_evac.html", "table_1_shelter.html",
           "table_2_disaster.html", 
           "table_2_evac.html", "table_2_shelter.html")) %>%
  split(.$file) %>%
  # Extract the table into a data.frame
  map_dfr(~get_table(.$file), .id = "file") %>%
  mutate(beta = na_if(beta, "")) %>%
  # Filter out variable group labels
  filter(!predictor %in% c(
    "Evacuation (per 1000 residents)",
    "Time Variant Controls",
    "Social Capital Indices",
    "Social Vulnerability Indices",
    "Health Care & Quality",
    "Governance")) %>%
  # Simplify model headers 
  mutate(case = str_extract(header, pattern = "Hurricane Zeta|Glass Fire"),
         outcome = str_extract(header, pattern = "Test Positivity Rate|Case Rate")) %>%
  mutate(type = str_extract(model, pattern = ".*Movement"),
         model = str_remove(model, pattern = type),
         effects = str_remove(model, pattern = ".*[(]") %>% str_remove("[)].*"),
         number = str_remove(model, pattern = ".*[)]"),
         model = str_extract(model, pattern = ".*[(]") %>% str_remove("[(]")) %>%
  mutate(file = file %>% dplyr::recode_factor(
    "table_1.html" = "Panel",
    "table_1_disaster.html" = "Matched for Disaster",
    "table_1_evac.html" = "Matched for Evacuation",
    "table_1_shelter.html" = "Matched for Sheltering",
    "table_2_disaster.html" = "DiD Matched for Disaster",
    "table_2_evac.html" = "DiD Matched for Evacuation",
    "table_2_shelter.html" = "DiD Matched for Sheltering")) %>%
  # Arrange as preferred
  select(method = file, case, outcome, number, model, type, effects, label, predictor, beta) %>%
  # remove any rows that don't have results,
  # these are just empty space
  filter(!is.na(beta))
```

## Build Table

```{r}

mytab <- bind_rows(
  # Get Effects of all IVs from Panel models, with varying levels of controls
  mydat %>%
    filter(method == "Panel") %>%
    filter(predictor %in% c(
      "Rainfall", "% Average Acres Burned",
      "Increased Mobility", 
      "Increased Mobility within Cities",
      "Increased Mobility between Cities",
      "Decreased Mobility", 
      "Decreased Mobility within Cities",
      "Decreased Mobility between Cities")) %>%
    mutate(predictor = if_else(predictor %in% c("Rainfall", "% Average Acres Burned"),
                               "Damage", predictor)),
  # Get Effect of Damage from Matched Models, with varying levels of controls
  mydat %>%
    filter(method == "Matched for Disaster") %>%
    filter(predictor %in% c("Rainfall", "% Average Acres Burned")) %>%
    mutate(predictor = "Damage"),
  
  # Get Effect of Evacuation from Matched Models, with varying levels of controls
  mydat %>%
    filter(method == "Matched for Evacuation") %>%
    filter(predictor %in% c("Increased Mobility", 
                            "Increased Mobility within Cities",
                            "Increased Mobility between Cities")),
  
  # Get Effect of Sheltering from Matched Models, with varying levels of controls
  mydat %>%
    filter(method == "Matched for Sheltering") %>%
    filter(predictor %in% c("Decreased Mobility", 
                            "Decreased Mobility within Cities",
                            "Decreased Mobility between Cities"))) %>%


  select(method, case, outcome, number, predictor, beta, number) %>%
  mutate(predictor = str_replace_all(predictor, pattern = " ", replacement = "<br>")) %>%
  mutate(method = str_replace_all(method, pattern = " ", replacement = "<br>")) %>%
   mutate(case = paste(case, "<br>(Logged ", outcome, ")", sep = "")) %>%
  mutate(estimate = str_remove(beta, "[(].*") %>% str_trim(side = "both") %>% as.numeric(),
         sig = str_extract(beta, pattern = "[*]+"),
         sig = if_else(is.na(sig), "", sig),
         alpha = if_else(sig %in% c("*", "**", "***"),
                                    "p < 0.05", "")) %>%
  mutate(method = if_else(str_detect(method, "Matched"), "Matched", "Panel"),
         method = factor(method, levels = c("Icon", "Effect", "Panel", "Matched"))) %>%
  group_by(case, method, predictor) %>%
  arrange(estimate) %>%
  mutate(order = 1:n()) %>%
  ungroup() %>%
  mutate(label = paste(estimate, sig, "<br>", number, sep = "")) %>%
  mutate(sorter = str_extract(number, "[-].*") %>% as.numeric(),
         sorter = if_else(abs(sorter) > 3, abs(sorter) - 3, abs(sorter)),
         sorter = factor(sorter)) %>%
   mutate(predictor = factor(
    predictor, levels = c("Damage",
                  "Increased<br>Mobility", 
                  "Increased<br>Mobility<br>between<br>Cities",
                  "Increased<br>Mobility<br>within<br>Cities",
                  "Decreased<br>Mobility", 
                  "Decreased<br>Mobility<br>between<br>Cities",
                  "Decreased<br>Mobility<br>within<br>Cities")))
```

```{r}



library(fontawesome)
library(rsvg)
library(magick)


fa_png(name = "house-damage",
       fill = "black", fill_opacity = 0.5, 
       file = paste("viz/fa/", "house-damage", ".png", sep = ""))
fa_png(name = "running",
       fill = "#FE6100", fill_opacity = 0.5, 
       file = paste("viz/fa/", "running", ".png", sep = ""))
fa_png(name = "house-user",
       fill = "#785EF0", fill_opacity = 0.75, 
       file = paste("viz/fa/", "house-user", ".png", sep = ""))

fa_png(name = "arrow-up",
       fill = "#648FFF", fill_opacity = 0.5, width = 500, height = 1000,
       file = paste("viz/fa/", "arrow-up", ".png", sep = ""))
fa_png(name = "arrow-down",
       fill = "#DC267F", fill_opacity = 0.5, width = 500, height = 1000,
       file = paste("viz/fa/", "arrow-down", ".png", sep = ""))
fa_png(name = "question",
       fill = "grey", fill_opacity = 0.5, width = 500, height = 1000,
       file = paste("viz/fa/", "question", ".png", sep = ""))




myimages <- bind_rows(
  # California
  data.frame(
    icon = c(
      "viz/fa/house-damage.png",
      "viz/fa/house-user.png",
      "viz/fa/house-user.png",
      "viz/fa/house-user.png",
      "viz/fa/running.png",
      "viz/fa/running.png",
      "viz/fa/running.png"),
    predictor = c("Damage",
                  "Decreased<br>Mobility", 
                  "Decreased<br>Mobility<br>between<br>Cities",
                  "Decreased<br>Mobility<br>within<br>Cities", 
                  "Increased<br>Mobility", 
                  "Increased<br>Mobility<br>between<br>Cities",
                  "Increased<br>Mobility<br>within<br>Cities"),
    case = rep("Glass Fire<br>(Logged Case Rate)", 7),
    method = rep("Icon", 7)),
  
  data.frame(
    icon = c(
      "viz/fa/arrow-up.png",
      
      "viz/fa/arrow-down.png",
      "viz/fa/question.png",
      "viz/fa/arrow-down.png",
      
      "viz/fa/arrow-down.png",
      "viz/fa/question.png",
      "viz/fa/arrow-down.png"),
    predictor = c("Damage",
                  "Decreased<br>Mobility", 
                  "Decreased<br>Mobility<br>between<br>Cities",
                  "Decreased<br>Mobility<br>within<br>Cities", 
                  "Increased<br>Mobility", 
                  "Increased<br>Mobility<br>between<br>Cities",
                  "Increased<br>Mobility<br>within<br>Cities"),
    case = rep("Glass Fire<br>(Logged Case Rate)", 7),
    method = rep("Effect", 7)),
  
  # Hurricane Zeta
  data.frame(
     icon = c(
      "viz/fa/house-damage.png",
      "viz/fa/house-user.png",
      "viz/fa/house-user.png",
      "viz/fa/house-user.png","viz/fa/running.png",
      "viz/fa/running.png",
      "viz/fa/running.png"),
     predictor = c("Damage",
                  "Decreased<br>Mobility", 
                  "Decreased<br>Mobility<br>between<br>Cities",
                  "Decreased<br>Mobility<br>within<br>Cities", 
                  "Increased<br>Mobility", 
                  "Increased<br>Mobility<br>between<br>Cities",
                  "Increased<br>Mobility<br>within<br>Cities"),
    case = rep("Hurricane Zeta<br>(Logged Test Positivity Rate)", 7),
    method = rep("Icon", 7)),
  data.frame(
    icon = c(
      "viz/fa/question.png",
      "viz/fa/arrow-down.png",
      "viz/fa/arrow-up.png",
      "viz/fa/arrow-down.png",
      "viz/fa/question.png",
      "viz/fa/question.png",
      "viz/fa/question.png"),
    predictor = c("Damage",
                  "Decreased<br>Mobility", 
                  "Decreased<br>Mobility<br>between<br>Cities",
                  "Decreased<br>Mobility<br>within<br>Cities", 
                  "Increased<br>Mobility", 
                  "Increased<br>Mobility<br>between<br>Cities",
                  "Increased<br>Mobility<br>within<br>Cities"),
    case = rep("Hurricane Zeta<br>(Logged Test Positivity Rate)", 7),
    method = rep("Effect", 7))
) %>%
   mutate(method = factor(method, levels = c("Icon", "Effect", "Panel", "Matched"))) %>%
  mutate(predictor = factor(
    predictor, levels = c("Damage",
                  "Increased<br>Mobility", 
                  "Increased<br>Mobility<br>between<br>Cities",
                  "Increased<br>Mobility<br>within<br>Cities",
                  "Decreased<br>Mobility", 
                  "Decreased<br>Mobility<br>between<br>Cities",
                  "Decreased<br>Mobility<br>within<br>Cities")))

```

```{r}
library(ggtext)
g1 <- mytab %>%
  ggplot(mapping = aes(y = sorter, x = method, #reorder(order, -order), 
                       label = label, 
                       alpha = alpha, 
                       fill = estimate)) +
  scale_fill_gradient2(low = "#DC267F", mid = "white", midpoint = 0,
                       high = "#648FFF", guide = guide_colorbar(barwidth = 8, order = 1)) +
  
  scale_alpha_manual(values = c(0.5, 1), guide = "none") +
  #geom_tile() +
  geom_richtext(color = "black") +
  facet_grid(predictor~case, scales = "free", space = "free") +
  theme_bw(base_size = 14) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.x = element_markdown(),
        strip.placement = "outside",
        strip.text.x = element_markdown(),
        plot.subtitle = element_text(hjust = 0.5),
        panel.border = element_rect(color = "gray"),
        strip.text.y = element_markdown(angle = 0),
        strip.text = element_markdown(angle = 0),
        plot.caption = element_text(hjust = 0.5)) +
  theme(legend.position = "bottom", legend.box = "horizontal") +
  labs(subtitle = "Effects on Logged COVID-19 Spread Rates by Model",
       x = NULL, y = NULL, alpha = "Significance", fill = "Standardized Effect (Beta)",
       caption = "Significant associations highlighted; insignificant associations transparent.") +
  geom_image(data = myimages %>%
               
               filter(method != "Icon"),
             mapping = aes(
               x = method,
               y = rep( c(2, 1, 1, 1, 1.5, 1, 1), 2) %>% as.numeric(), 
               image= icon, 
               label = "Nothing", fill = 0.25, alpha = 1), 
             label = "",
             size = 0.3, 
             alpha = 0.5, 
             fill = "blue")
  

g <- ggplot_gtable(ggplot_build(g1))

stripr <- which(grepl('strip-r', g$layout$name))

fills <- c("grey",
           "#F6BB97", "#F6BB97", "#F6BB97",
           "#A599DF", "#A599DF", "#A599DF")

k <- 1
for (i in stripr) {
  j <- which(grepl('rect', g$grobs[[i]]$grobs[[1]]$childrenOrder))
  g$grobs[[i]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
  k <- k+1
}

g1 <- ggpubr::as_ggplot(g)
```


```{r}
ggsave(g1, filename = "viz/summary_table.png", dpi = 500, width = 8, height = 9)

```

```{r}

mysub <- myimages %>%
  filter(method == "Icon") %>%
  filter(predictor %in% c("Damage", 
                          "Increased<br>Mobility", 
                          "Decreased<br>Mobility")) %>%
  filter(case == "Glass Fire<br>(Logged Case Rate)") %>%
  mutate(case = "Concept") %>%
  mutate(label = c("Disaster Damage", "Sheltering-in-Place", "Evacuation")) %>%
  mutate(ymin = -1,
         ymax = 1,
         y = 0,
         xmin = -1,
         xmax = 1)


g2 <- ggplot() +
  geom_rect(data = mysub,
            mapping = aes(xmin = -1,
                          xmax = 1,
                          ymin = -1,
                          ymax = 1),
            fill = "white") +
  geom_image(data = mysub,
             mapping = aes(
               x = 0,
               y = y, 
               image= icon, 
               label = "Nothing", fill = 0.25, alpha = 0.5), 
             label = "",
             size = 0.75, 
             alpha = 0.5, 
             fill = "blue")  +
  geom_richtext(data = mysub,
            mapping = aes(x = 0,
                          y = 0,
                          label = label),
            fill = NA,
            size = 5.5, label.color = NA,fontface = "bold",
            nudge_y = -0.9) +
    facet_grid(predictor~case, scales = "free", space = "free") +
  theme_bw(base_size = 14) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.x = element_markdown(),
        strip.text.x = element_markdown(),
        plot.subtitle = element_text(hjust = 0.5),
        panel.border = element_rect(color = "gray"),
        strip.text.y = element_markdown(angle = 0),
        plot.caption = element_text(hjust = 0.5)) +
  theme(legend.position = "bottom", legend.box = "horizontal",
        strip.background.y = element_blank(),
        strip.background.x = element_blank(),
        strip.text.x = element_blank(),
        panel.border = element_blank(),
        strip.text.y = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        panel.spacing = unit(0.5, "cm"),
        plot.margin = margin(0,0,0,0,"cm")) +
  scale_y_continuous(breaks = c(-1, 0, 1)) +
  labs(x = NULL, y = NULL, subtitle = "",
       caption = "\n\n\n\n\n")

myplot <- ggpubr::ggarrange(g2, g1, widths = c(0.25, 0.75), ncol = 2, 
                            common.legend = TRUE, legend = "bottom") +
  ggpubr::bgcolor("white") +
  ggpubr::border(color = "white")

ggsave(myplot, filename = "viz/summary_table_images.png", 
       dpi = 500, width = 10, height = 10)
```



## Compare Lags

```{r}
# Now for each TEXREG HTML Table file,
mydat <- data.frame(
  file = c("table_1.html", 
           "table_1_disaster.html", 
           "table_1_evac.html",
           "table_1_shelter.html",
           "table_2_disaster.html",
           "table_2_evac.html", 
           "table_2_shelter.html",
           
           
           "table_1_1wklater.html", 
           "table_1_disaster_1wklater.html", 
           "table_1_evac_1wklater.html",
           "table_1_shelter_1wklater.html",
           "table_2_disaster_1wklater.html",
           "table_2_evac_1wklater.html", 
           "table_2_shelter_1wklater.html",
           
           "table_1_3wklater.html", 
           "table_1_disaster_3wklater.html", 
           "table_1_evac_3wklater.html",
           "table_1_shelter_3wklater.html",
           "table_2_disaster_3wklater.html",
           "table_2_evac_3wklater.html", 
           "table_2_shelter_3wklater.html",
           
           "table_1_4wklater.html", 
           "table_1_disaster_4wklater.html", 
           "table_1_evac_4wklater.html",
           "table_1_shelter_4wklater.html",
           "table_2_disaster_4wklater.html",
           "table_2_evac_4wklater.html", 
           "table_2_shelter_4wklater.html",
           
           "table_1_5wklater.html", 
           "table_1_disaster_5wklater.html", 
           "table_1_evac_5wklater.html",
           "table_1_shelter_5wklater.html",
           "table_2_disaster_5wklater.html",
           "table_2_evac_5wklater.html", 
           "table_2_shelter_5wklater.html")) %>%
  split(.$file) %>%
  # Extract the table into a data.frame
  map_dfr(~get_table(.$file), .id = "file") %>%
  mutate(beta = na_if(beta, "")) %>%
  # Filter out variable group labels
  filter(!predictor %in% c(
    "Evacuation (per 1000 residents)",
    "Time Variant Controls",
    "Social Capital Indices",
    "Social Vulnerability Indices",
    "Health Care & Quality",
    "Governance")) %>%
  # Simplify model headers 
  mutate(case = str_extract(header, pattern = "Hurricane Zeta|Glass Fire"),
         outcome = str_extract(header, pattern = "Test Positivity Rate|Case Rate")) %>%
  mutate(type = str_extract(model, pattern = ".*Movement"),
         model = str_remove(model, pattern = type),
         effects = str_remove(model, pattern = ".*[(]") %>% str_remove("[)].*"),
         #number = str_remove(model, pattern = ".*[)]"),
         model = str_extract(model, pattern = ".*[(]") %>% str_remove("[(]")) %>%
  mutate(lag = str_extract(file, pattern = "[_][1-5]{1}wklater.html|.html"),
         file = str_remove(file, pattern = lag),
         table = file %>% dplyr::recode_factor(
           "table_1" = "A1",
           "table_1_disaster" = "A2",
           "table_1_evac" = "A3",
           "table_1_shelter" = "A4",
           "table_2_disaster" = "A5",
           "table_2_evac" = "A6",
           "table_2_shelter" = "A7"),
         file = file %>% dplyr::recode_factor(
           "table_1" = "Panel",
           "table_1_disaster" = "Matched for Disaster",
           "table_1_evac" = "Matched for Evacuation",
           "table_1_shelter" = "Matched for Sheltering",
           "table_2_disaster" = "DiD Matched for Disaster",
           "table_2_evac" = "DiD Matched for Evacuation",
           "table_2_shelter" = "DiD Matched for Sheltering"),
         lag = lag %>% recode_factor(
           ".html" = "2 weeks",
           "_1wklater.html" = "1 week",
           "_3wklater.html" = "3 weeks",
           "_4wklater.html" = "4 weeks",
           "_5wklater.html" = "5 weeks")) %>%
  mutate(number = paste("Model ", table, "-", str_extract(label, "[1-6]{1}"), sep = "")) %>%
  # Arrange as preferred
  select(method = file, lag, table, case, outcome, number, model, type, effects, label, predictor, beta) %>%
  # remove any rows that don't have results,
  # these are just empty space
  filter(!is.na(beta))

```


```{r}
mytab <- bind_rows(
  # Get Effects of all IVs from Panel models, with varying levels of controls
  mydat %>%
    filter(method == "Panel") %>%
    filter(predictor %in% c(
      "Rainfall", "% Average Acres Burned",
      "Increased Mobility", 
      "Increased Mobility within Cities",
      "Increased Mobility between Cities",
      "Decreased Mobility", 
      "Decreased Mobility within Cities",
      "Decreased Mobility between Cities")) %>%
    mutate(predictor = if_else(predictor %in% c("Rainfall", "% Average Acres Burned"),
                               "Damage", predictor)),
  # Get Effect of Damage from Matched Models, with varying levels of controls
  mydat %>%
    filter(method == "Matched for Disaster") %>%
    filter(predictor %in% c("Rainfall", "% Average Acres Burned")) %>%
    mutate(predictor = "Damage"),
  
  # Get Effect of Evacuation from Matched Models, with varying levels of controls
  mydat %>%
    filter(method == "Matched for Evacuation") %>%
    filter(predictor %in% c("Increased Mobility", 
                            "Increased Mobility within Cities",
                            "Increased Mobility between Cities")),
  
  # Get Effect of Sheltering from Matched Models, with varying levels of controls
  mydat %>%
    filter(method == "Matched for Sheltering") %>%
    filter(predictor %in% c("Decreased Mobility", 
                            "Decreased Mobility within Cities",
                            "Decreased Mobility between Cities")),
  
  
  # Get effect of Disasters OVER TIME from Matched DiD models
  mydat %>%
    filter(method == "DiD Matched for Disaster") %>%
    filter(predictor %in% c("% Average Acres Burned x Weeks",
                            "Rainfall x Weeks")) %>%
    mutate(predictor = "Damage"),
  
  
  mydat %>%
    filter(method == "DiD Matched for Evacuation") %>%
    filter(predictor %in% c("Increased Mobility x Weeks", 
                            "Increased Mobility within Cities x Weeks",
                            "Increased Mobility between Cities x Weeks")),
  
  mydat %>%
    filter(method == "DiD Matched for Sheltering") %>%
    filter(predictor %in% c("Decreased Mobility x Weeks", 
                            "Decreased Mobility within Cities x Weeks",
                            "Decreased Mobility between Cities x Weeks"))
)  %>%

  select(method, table, lag, case, outcome, number, predictor, beta, number) %>%
  mutate(predictor = str_remove(predictor, pattern = " x Weeks")) %>%
  mutate(predictor = str_replace_all(predictor, pattern = " ", replacement = "<br>")) %>%
  mutate(method = str_replace_all(method, pattern = " ", replacement = "<br>")) %>%
  mutate(estimate = str_remove(beta, "[(].*") %>% str_trim(side = "both") %>% as.numeric(),
         sig = str_extract(beta, pattern = "[*]+"),
         sig = if_else(is.na(sig), "", sig),
         alpha = if_else(sig %in% c("*", "**", "***"),
                                    "p < 0.05", "")) %>%
  mutate(method = case_when(
    str_detect(method, "DiD") ~ "DiD",
    str_detect(method, "DiD", negate = TRUE) & str_detect(method, "Matched") ~ "Matched",
    TRUE ~ "Panel"),
         method = factor(method, 
                         levels = c("Icon", "Effect", 
                                    "Panel", "Matched", "DiD"))) %>%
  group_by(case, method, predictor) %>%
  arrange(estimate) %>%
  mutate(order = 1:n()) %>%
  ungroup() %>%
  mutate(label = paste(estimate, sig, "<br>", number, sep = "")) %>%
  mutate(sorter = str_extract(number, "[-].*") %>% as.numeric(),
         sorter = if_else(abs(sorter) > 3, abs(sorter) - 3, abs(sorter)),
         sorter = factor(sorter)) %>%
   mutate(predictor = factor(
    predictor, levels = c("Damage",
                  "Increased<br>Mobility", 
                  "Increased<br>Mobility<br>between<br>Cities",
                  "Increased<br>Mobility<br>within<br>Cities",
                  "Decreased<br>Mobility", 
                  "Decreased<br>Mobility<br>between<br>Cities",
                  "Decreased<br>Mobility<br>within<br>Cities"))) %>%
  mutate(se = str_extract(beta, "[(].*.[)]") %>% 
           str_remove_all(pattern = "[(]|[)]") %>% as.numeric()) %>%
  mutate(lag = str_extract(lag, pattern = "[1-5]{1}") %>% as.numeric()) %>%
  mutate(num = str_remove(number, pattern = "Model A[1-7]{1}[-]"),
         mylab = paste(
           str_extract(table, pattern = "[0-7]{1}"),
           "-", 
           num, sep = "")) %>%
  # If three or more are sig, indicate yes
  group_by(case, method, predictor, mylab) %>%
  mutate(indicator = if_else(sum(alpha == "p < 0.05") >= 3, "pass", "fail")) %>%
  ungroup()

mylabelset <- mytab %>%
  group_by(case, method, predictor) %>%
  summarize(
    mylab = unique(mylab),
            count = 1:length(mylab),
            position = case_when(
              count == 1 ~ 1,
              count == 2 ~ 3,
              count == 3 ~ 5)) %>%
  left_join(by = c("case", "method", "predictor", "mylab","position" =  "lag"),
            y = mytab %>% select(case, method, predictor, mylab, indicator, sorter, lag, estimate))
```


```{r}
library(ggtext)

g1 <- mytab %>% 
  # Set global aesthetics
  ggplot() +
  geom_rect(data = mylabelset %>%
              filter(indicator == "pass"),
            mapping = aes(
              
              xmin = -Inf, xmax = Inf,
                          ymin = -Inf, ymax = Inf),
            fill = "black") +
  
  
  geom_hline(yintercept = 0, 
             linetype = "dashed", color = "darkgrey") +
  # Add 95% confidene intervals
  geom_linerange(
    data = mytab,
    mapping = aes(x = lag, y = estimate, 
                  group = mylab, 
                  color = sorter, alpha = indicator,
                  ymin = estimate - se * 1.96,
                  ymax = estimate + se * 1.96),
    size = 1.25,
    position = position_dodge2(width = 0.5)) +
  geom_point(data = mytab,
             mapping = aes(x = lag, y = estimate, 
                           group = mylab, label = mylab,
                           color = sorter, alpha = indicator),
             size = 3) +
  geom_ribbon(
    data = mytab,
    mapping = aes(x = lag, y = estimate, 
                  group = mylab, fill = sorter,
                  ymin = estimate - se * 1.96,
                  ymax = estimate + se * 1.96,
                  alpha = indicator),
    size = 1.25,   color = NA,
    position = position_dodge2(width = 0.5)) +
  geom_label(data = mylabelset,
             mapping = aes(x = position, y = estimate, 
                           group = mylab, fill = sorter, 
                           label = mylab, alpha = indicator),
             size = 4, alpha = 0.75, color = "black") +
  facet_grid(predictor~case+method, scales = "free") +
  coord_flip() +
  theme_bw(base_size = 14) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_markdown(),
        strip.text.x = element_markdown(),
        plot.subtitle = element_text(hjust = 0.5),
        panel.border = element_rect(color = "gray"),
        strip.text.y = element_markdown(angle = 0),
        plot.caption = element_text(hjust = 0.5)) +
  theme(legend.position = "bottom", legend.box = "horizontal",
        panel.spacing = unit(0.5, "cm"),
        plot.margin = margin(0,0,0,0,"cm"))  +
  scale_alpha_manual(
    breaks = c("pass", "fail"),
    labels = c("3 or more\nsignificant associations\n(p < 0.05)",
               "2 or fewer\nsignificant associations"),
    values = c(0.75, 0.15),
    guide = guide_legend(override.aes = list(
      fill = c("black", "grey"),
      alpha = c(0.75, 0.15)))) +
  viridis::scale_fill_viridis(option = "plasma", begin = 0.2, end = 0.8, discrete = TRUE) +
  viridis::scale_color_viridis(option = "plasma", begin = 0.2, end = 0.8, discrete = TRUE) +
  theme(plot.subtitle = element_text(hjust = 0.5)) +
  labs(
    subtitle = "Sensitivity Tests of Key Effects, measuring COVID-19 rates 1, 2, 3, 4, or 5 weeks after disaster.",
    x = "Number of Weeks Passed since Disaster",
    y = "Standardized Effect (Beta) on Logged COVID-19 Rates*",
    alpha = "How Consistently do Key Variables\nshow Significant Effects?") +
  guides(color = "none", fill = "none") +
  scale_y_continuous(n.breaks = 4)


ggsave(g1, filename = "viz/summary_table_time.png", 
       dpi = 500, height = 10, width = 12)
# i need to show variation by method and model(?)



```

